|
4 | 4 | from typing import Any, Dict, Optional, TypedDict, Union |
5 | 5 |
|
6 | 6 | from celery import Celery, Task, states |
| 7 | +from gevent import monkey |
7 | 8 | from model_engine_server.common.constants import DEFAULT_CELERY_TASK_NAME, LIRA_CELERY_TASK_NAME |
8 | 9 | from model_engine_server.common.dtos.model_endpoints import BrokerType |
9 | 10 | from model_engine_server.common.dtos.tasks import EndpointPredictV1Request |
|
23 | 24 | from model_engine_server.inference.infra.gateways.datadog_inference_monitoring_metrics_gateway import ( |
24 | 25 | DatadogInferenceMonitoringMetricsGateway, |
25 | 26 | ) |
26 | | -from requests import ConnectionError |
| 27 | +from request import ConnectionError |
| 28 | + |
| 29 | +monkey.patch_all() |
27 | 30 |
|
28 | 31 | logger = make_logger(logger_name()) |
29 | 32 |
|
@@ -144,7 +147,7 @@ def exec_func(payload, arrival_timestamp, *ignored_args, **ignored_kwargs): |
144 | 147 | # Don't fail the celery task even if there's a status code |
145 | 148 | # (otherwise we can't really control what gets put in the result attribute) |
146 | 149 | # in the task (https://docs.celeryq.dev/en/stable/reference/celery.result.html#celery.result.AsyncResult.status) |
147 | | - result = forwarder(payload) |
| 150 | + result = forwarder.forward(payload) |
148 | 151 | request_duration = datetime.now() - arrival_timestamp |
149 | 152 | if request_duration > timedelta(seconds=DEFAULT_TASK_VISIBILITY_SECONDS): |
150 | 153 | monitoring_metrics_gateway.emit_async_task_stuck_metric(queue_name) |
@@ -177,12 +180,7 @@ def start_celery_service( |
177 | 180 | concurrency=concurrency, |
178 | 181 | loglevel="INFO", |
179 | 182 | optimization="fair", |
180 | | - # Don't use pool="solo" so we can send multiple concurrent requests over |
181 | | - # Historically, pool="solo" argument fixes the known issues of celery and some of the libraries. |
182 | | - # Particularly asyncio and torchvision transformers. This isn't relevant since celery-forwarder |
183 | | - # is quite lightweight |
184 | | - # TODO: we should probably use eventlet or gevent for the pool, since |
185 | | - # the forwarder is nearly the most extreme example of IO bound. |
| 183 | + pool="gevent", |
186 | 184 | ) |
187 | 185 | worker.start() |
188 | 186 |
|
|
0 commit comments