Skip to content

Commit 28e3b0d

Browse files
Increase default concurrency to 100 for http forwarder (#552)
* increase default concurrency to 50 for http forwarder * even more hehe * codecoverage
1 parent d9e9623 commit 28e3b0d

File tree

2 files changed

+35
-1
lines changed

2 files changed

+35
-1
lines changed

model-engine/model_engine_server/inference/forwarding/http_forwarder.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ def get_streaming_forwarder_loader():
5151
@lru_cache()
5252
def get_concurrency_limiter():
5353
config = get_config()
54-
concurrency = int(config.get("max_concurrency", 5))
54+
concurrency = int(config.get("max_concurrency", 100))
5555
return MultiprocessingConcurrencyLimiter(
5656
concurrency=concurrency, fail_on_concurrency_limit=True
5757
)

model-engine/tests/unit/inference/test_http_forwarder.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from model_engine_server.inference.forwarding.forwarding import Forwarder
1111
from model_engine_server.inference.forwarding.http_forwarder import (
1212
MultiprocessingConcurrencyLimiter,
13+
get_concurrency_limiter,
1314
predict,
1415
)
1516
from model_engine_server.inference.infra.gateways.datadog_inference_monitoring_metrics_gateway import (
@@ -57,6 +58,32 @@ def json(self) -> dict:
5758
return mocked_static_json()
5859

5960

61+
def mocked_get_config():
62+
return {
63+
"sync": {
64+
"user_port": 5005,
65+
"user_hostname": "localhost",
66+
"use_grpc": False,
67+
"predict_route": "/predict",
68+
"healthcheck_route": "/readyz",
69+
"batch_route": None,
70+
"model_engine_unwrap": True,
71+
"serialize_results_as_string": True,
72+
"forward_http_status": True,
73+
},
74+
"stream": {
75+
"user_port": 5005,
76+
"user_hostname": "localhost",
77+
"predict_route": "/stream",
78+
"healthcheck_route": "/readyz",
79+
"batch_route": None,
80+
"model_engine_unwrap": True,
81+
"serialize_results_as_string": False,
82+
},
83+
"max_concurrency": 42,
84+
}
85+
86+
6087
@pytest.fixture
6188
def post_inference_hooks_handler():
6289
handler = PostInferenceHooksHandler(
@@ -108,6 +135,13 @@ def mock_request():
108135
)
109136

110137

138+
@mock.patch("model_engine_server.inference.forwarding.http_forwarder.get_config", mocked_get_config)
139+
def test_get_concurrency_limiter():
140+
limiter = get_concurrency_limiter()
141+
assert isinstance(limiter, MultiprocessingConcurrencyLimiter)
142+
assert limiter.concurrency == 42
143+
144+
111145
@mock.patch("requests.post", mocked_post)
112146
@mock.patch("requests.get", mocked_get)
113147
def test_http_service_429(mock_request, post_inference_hooks_handler):

0 commit comments

Comments
 (0)