|
28 | 28 |
|
29 | 29 | from tqdm import tqdm |
30 | 30 |
|
| 31 | +from lighteval.models.model_output import ModelResponse |
31 | 32 | from lighteval.utils.imports import is_litellm_available, is_openai_available, is_vllm_available |
32 | 33 |
|
33 | 34 |
|
@@ -195,20 +196,30 @@ def __call_litellm(self, prompts): |
195 | 196 | def __call_api(prompt): |
196 | 197 | for _ in range(self.API_MAX_RETRY): |
197 | 198 | try: |
198 | | - response = litellm.completion( |
199 | | - model=self.model, |
200 | | - messages=prompt, |
201 | | - response_format={"type": "text"}, |
202 | | - max_tokens=512, |
203 | | - n=1, |
204 | | - caching=True, |
205 | | - ) |
| 199 | + kwargs = { |
| 200 | + "model": self.model, |
| 201 | + "messages": prompt, |
| 202 | + "response_format": {"type": "text"}, |
| 203 | + "max_tokens": 512, |
| 204 | + "n": 1, |
| 205 | + "caching": True, |
| 206 | + } |
| 207 | + response = litellm.completion(**kwargs) |
206 | 208 | text = response.choices[0].message.content |
| 209 | + if not text or response.failed: |
| 210 | + kwargs["caching"] = False |
| 211 | + response = litellm.completion(**kwargs) |
| 212 | + text = response.choices[0].message.content |
| 213 | + if not text or response.failed: |
| 214 | + # Just return an error response if the second attempt fails too |
| 215 | + return ModelResponse( |
| 216 | + text="Failed to get response from the API.", model=self.model, failed=True |
| 217 | + ) |
207 | 218 | return text |
208 | 219 | except Exception as e: |
209 | 220 | logger.warning(f"{type(e), e}") |
210 | 221 | time.sleep(self.API_RETRY_SLEEP) |
211 | | - raise Exception("Failed to get response from the API") |
| 222 | + return ModelResponse(text="Failed to get response from the API.", model=self.model, failed=True) |
212 | 223 |
|
213 | 224 | results = [] |
214 | 225 | with ThreadPoolExecutor(100) as executor: |
|
0 commit comments