Skip to content

Commit b0edbc3

Browse files
dameikleNathanHB
andauthored
Added ability to configure concurrent_requests in litellm_model.py (#911)
* Added ability to configure concurrent_requests in litellm_model.py * Update src/lighteval/models/endpoints/litellm_model.py Co-authored-by: Nathan Habib <[email protected]> * Updated docstring for new concurrent_requests parameter in LiteLLMModelConfig --------- Co-authored-by: Nathan Habib <[email protected]>
1 parent 76e5aff commit b0edbc3

File tree

1 file changed

+8
-2
lines changed

1 file changed

+8
-2
lines changed

src/lighteval/models/endpoints/litellm_model.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -78,13 +78,18 @@ class LiteLLMModelConfig(ModelConfig):
7878
api_key (str | None):
7979
API key for authentication. If None, reads from environment variables.
8080
Environment variable names are provider-specific (e.g., OPENAI_API_KEY).
81+
concurrent_requests (int):
82+
Maximum number of concurrent API requests to execute in parallel.
83+
Higher values can improve throughput for batch processing but may hit rate limits
84+
or exhaust API quotas faster. Default is 10.
8185
8286
Example:
8387
```python
8488
config = LiteLLMModelConfig(
8589
model_name="gpt-4",
8690
provider="openai",
8791
base_url="https://api.openai.com/v1",
92+
concurrent_requests=5,
8893
generation_parameters=GenerationParameters(
8994
temperature=0.7,
9095
max_new_tokens=100
@@ -97,6 +102,7 @@ class LiteLLMModelConfig(ModelConfig):
97102
provider: str | None = None
98103
base_url: str | None = None
99104
api_key: str | None = None
105+
concurrent_requests: int = 10
100106

101107

102108
class LiteLLMClient(LightevalModel):
@@ -113,11 +119,11 @@ def __init__(self, config: LiteLLMModelConfig) -> None:
113119
self.base_url = config.base_url
114120
self.api_key = config.api_key
115121
self.generation_parameters = config.generation_parameters
122+
self.concurrent_requests = config.concurrent_requests
116123

117124
self.API_MAX_RETRY = 5
118125
self.API_RETRY_SLEEP = 3
119126
self.API_RETRY_MULTIPLIER = 2
120-
self.CONCURRENT_CALLS = 10 # 100 leads to hitting Anthropic rate limits
121127

122128
self._tokenizer = encode
123129
self.pairwise_tokenization = False
@@ -229,7 +235,7 @@ def __call_api_parallel(
229235
f"Length of prompts, return_logitss, max_new_tokenss, num_sampless, stop_sequences, system_prompts should be the same but are {len(prompts)}, {len(return_logitss)}, {len(max_new_tokenss)}, {len(num_sampless)}, {len(stop_sequencess)}"
230236
)
231237

232-
with ThreadPoolExecutor(self.CONCURRENT_CALLS) as executor:
238+
with ThreadPoolExecutor(self.concurrent_requests) as executor:
233239
for entry in tqdm(
234240
executor.map(
235241
self.__call_api,

0 commit comments

Comments
 (0)