huggingface · NathanHB · Aug 14, 2025 · Aug 9, 2025 · Aug 11, 2025 · Aug 12, 2025
diff --git a/src/lighteval/models/endpoints/litellm_model.py b/src/lighteval/models/endpoints/litellm_model.py
@@ -97,6 +97,7 @@ class LiteLLMModelConfig(ModelConfig):
     provider: str | None = None
     base_url: str | None = None
     api_key: str | None = None
+    concurrent_requests: int = 10
 
 
 class LiteLLMClient(LightevalModel):
@@ -113,11 +114,11 @@ def __init__(self, config: LiteLLMModelConfig) -> None:
         self.base_url = config.base_url
         self.api_key = config.api_key
         self.generation_parameters = config.generation_parameters
+        self.concurrent_requests = config.concurrent_requests
 
         self.API_MAX_RETRY = 5
         self.API_RETRY_SLEEP = 3
         self.API_RETRY_MULTIPLIER = 2
-        self.CONCURRENT_CALLS = 10  # 100 leads to hitting Anthropic rate limits
 
         self._tokenizer = encode
         self.pairwise_tokenization = False
@@ -229,7 +230,7 @@ def __call_api_parallel(
             f"Length of prompts, return_logitss, max_new_tokenss, num_sampless, stop_sequences, system_prompts should be the same but are {len(prompts)}, {len(return_logitss)}, {len(max_new_tokenss)}, {len(num_sampless)}, {len(stop_sequencess)}"
         )
 
-        with ThreadPoolExecutor(self.CONCURRENT_CALLS) as executor:
+        with ThreadPoolExecutor(self.concurrent_requests) as executor:
             for entry in tqdm(
                 executor.map(
                     self.__call_api,