@@ -27,18 +27,11 @@ HTTP client for LLM inference with multiprocessing workers and ZMQ communication
2727## Usage
2828
2929``` python
30- from inference_endpoint.endpoint_client import (
31- HTTPEndpointClient,
32- HTTPClientConfig,
33- AioHttpConfig,
34- ZMQConfig,
35- )
30+ from inference_endpoint.endpoint_client import HTTPEndpointClient, HTTPClientConfig
3631from inference_endpoint.core.types import Query
3732
3833client = HTTPEndpointClient(
39- HTTPClientConfig(endpoint_url = " http://localhost:8000/v1/completions" , num_workers = 2 ),
40- AioHttpConfig(),
41- ZMQConfig(),
34+ HTTPClientConfig(endpoint_url = " http://localhost:8000/v1/completions" )
4235)
4336
4437# Sync issue (fire-and-forget)
@@ -64,19 +57,12 @@ if response:
6457### With HttpClientSampleIssuer
6558
6659``` python
67- from inference_endpoint.endpoint_client import (
68- HTTPEndpointClient,
69- HTTPClientConfig,
70- AioHttpConfig,
71- ZMQConfig,
72- )
60+ from inference_endpoint.endpoint_client import HTTPEndpointClient, HTTPClientConfig
7361from inference_endpoint.endpoint_client.http_sample_issuer import HttpClientSampleIssuer
7462from inference_endpoint.load_generator.sample import Sample
7563
7664client = HTTPEndpointClient(
77- HTTPClientConfig(endpoint_url = " http://localhost:8000/v1/completions" , num_workers = 4 ),
78- AioHttpConfig(),
79- ZMQConfig(),
65+ HTTPClientConfig(endpoint_url = " http://localhost:8000/v1/completions" , num_workers = 4 )
8066)
8167issuer = HttpClientSampleIssuer(client)
8268
@@ -86,18 +72,6 @@ issuer.issue(Sample(
8672))
8773```
8874
89- ## Configuration
90-
91- ``` python
92- HTTPClientConfig(
93- endpoint_url = " http://localhost:8000/v1/completions" ,
94- num_workers = 4 , # Number of worker processes
95- )
96-
97- AioHttpConfig() # Socket, TCP, HTTP configs (use defaults)
98- ZMQConfig() # IPC configs (use defaults)
99- ```
100-
10175## Shutdown
10276
10377Shutdown is optional. Workers and event loop thread are daemons - they terminate automatically with the main process.
0 commit comments