Skip to content
Merged
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions src/lighteval/models/endpoints/litellm_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ class LiteLLMModelConfig(ModelConfig):
provider: str | None = None
base_url: str | None = None
api_key: str | None = None
concurrent_requests: int = 10


class LiteLLMClient(LightevalModel):
Expand All @@ -113,11 +114,11 @@ def __init__(self, config: LiteLLMModelConfig) -> None:
self.base_url = config.base_url
self.api_key = config.api_key
self.generation_parameters = config.generation_parameters
self.concurrent_requests = config.concurrent_requests

self.API_MAX_RETRY = 5
self.API_RETRY_SLEEP = 3
self.API_RETRY_MULTIPLIER = 2
self.CONCURRENT_CALLS = 10 # 100 leads to hitting Anthropic rate limits

self._tokenizer = encode
self.pairwise_tokenization = False
Expand Down Expand Up @@ -229,7 +230,7 @@ def __call_api_parallel(
f"Length of prompts, return_logitss, max_new_tokenss, num_sampless, stop_sequences, system_prompts should be the same but are {len(prompts)}, {len(return_logitss)}, {len(max_new_tokenss)}, {len(num_sampless)}, {len(stop_sequencess)}"
)

with ThreadPoolExecutor(self.CONCURRENT_CALLS) as executor:
with ThreadPoolExecutor(self.concurrent_requests) as executor:
for entry in tqdm(
executor.map(
self.__call_api,
Expand Down
Loading