diff --git a/examples/model_configs/litellm_model.yaml b/examples/model_configs/litellm_model.yaml index b0cb25199..0c5756a16 100644 --- a/examples/model_configs/litellm_model.yaml +++ b/examples/model_configs/litellm_model.yaml @@ -2,6 +2,7 @@ model_parameters: model_name: "openai/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B" provider: "openai" base_url: "https://router.huggingface.co/hf-inference/v1" + concurrent_requests: 10 # Configure the number of concurrent API requests generation_parameters: temperature: 0.5 max_new_tokens: 256 diff --git a/src/lighteval/models/abstract_model.py b/src/lighteval/models/abstract_model.py index 81d725e6a..8979c5a8b 100644 --- a/src/lighteval/models/abstract_model.py +++ b/src/lighteval/models/abstract_model.py @@ -56,6 +56,10 @@ class ModelConfig(BaseModel, extra="forbid"): behavior and context for the model during evaluation. cache_dir (str): Directory to cache the model. Defaults to "~/.cache/huggingface/lighteval". + concurrent_requests (int): + Maximum number of concurrent API requests to execute in parallel. + Higher values can improve throughput for batch processing but may hit rate limits + or exhaust API quotas faster. Default is 10. Methods: from_path(path: str): @@ -87,6 +91,8 @@ class ModelConfig(BaseModel, extra="forbid"): generation_parameters: GenerationParameters = GenerationParameters() system_prompt: str | None = None cache_dir: str = "~/.cache/huggingface/lighteval" + concurrent_requests: int = 10 + @classmethod def from_path(cls, path: str):