3737import time
3838from dataclasses import dataclass
3939
40+ from inference_endpoint .async_utils .transport .zmq .context import ManagedZMQContext
4041from inference_endpoint .core .types import Query , QueryResult
4142from inference_endpoint .endpoint_client .config import HTTPClientConfig
4243from inference_endpoint .endpoint_client .cpu_affinity import compute_affinity_plan
@@ -399,6 +400,7 @@ def _create_client(
399400 prompt : str ,
400401 enable_affinity : bool ,
401402 verbose : bool = True ,
403+ zmq_context : ManagedZMQContext | None = None ,
402404) -> tuple :
403405 """Create an endpoint client and query data dict.
404406
@@ -422,7 +424,7 @@ def _create_client(
422424 endpoint_urls = [endpoint_url ],
423425 num_workers = num_workers if num_workers > 0 else - 1 ,
424426 max_connections = max_connections if max_connections > 0 else - 1 ,
425- warmup_connections = False ,
427+ warmup_connections = 0 ,
426428 worker_gc_mode = "relaxed" ,
427429 log_level = "CRITICAL" ,
428430 cpu_affinity = cpu_affinity_plan ,
@@ -434,7 +436,7 @@ def _create_client(
434436 f"max_connections={ config .max_connections } , stream={ streaming } "
435437 )
436438
437- client = AsyncHttpEndpointClient (config )
439+ client = AsyncHttpEndpointClient (config , zmq_context = zmq_context )
438440 query_data = {
439441 "prompt" : prompt ,
440442 "model" : "benchmark-model" ,
@@ -488,13 +490,17 @@ def run_benchmark(
488490 except OSError :
489491 pass
490492
493+ zmq_ctx_manager = ManagedZMQContext .scoped ()
494+ zmq_ctx = zmq_ctx_manager .__enter__ ()
495+
491496 client , query_data = _create_client (
492497 endpoint_url ,
493498 num_workers ,
494499 max_connections ,
495500 streaming ,
496501 prompt ,
497502 enable_affinity ,
503+ zmq_context = zmq_ctx ,
498504 )
499505 loop = client .loop
500506 stats = BenchmarkStats (sse_events_per_response = sse_events_per_response )
@@ -613,6 +619,7 @@ async def receiver():
613619 gc .collect ()
614620
615621 asyncio .run_coroutine_threadsafe (client .shutdown (), loop ).result (timeout = 10.0 )
622+ zmq_ctx_manager .__exit__ (None , None , None )
616623
617624 # Restore original affinity so the next sweep iteration sees all CPUs
618625 if saved_affinity is not None :
0 commit comments