|
24 | 24 | import logging |
25 | 25 | from typing import Any, List, Optional |
26 | 26 |
|
27 | | -import yaml |
28 | 27 | from huggingface_hub import AsyncInferenceClient, ChatCompletionOutput |
29 | 28 | from huggingface_hub.errors import HfHubHTTPError |
30 | 29 | from pydantic import NonNegativeInt |
|
35 | 34 | from lighteval.data import GenerativeTaskDataset |
36 | 35 | from lighteval.models.abstract_model import LightevalModel |
37 | 36 | from lighteval.models.endpoints.endpoint_model import ModelInfo |
38 | | -from lighteval.models.model_input import GenerationParameters |
39 | 37 | from lighteval.models.model_output import ( |
40 | 38 | GenerativeResponse, |
41 | 39 | LoglikelihoodResponse, |
@@ -72,26 +70,6 @@ class InferenceProvidersModelConfig(ModelConfig): |
72 | 70 | org_to_bill: str | None = None |
73 | 71 | parallel_calls_count: NonNegativeInt = 10 |
74 | 72 |
|
75 | | - @classmethod |
76 | | - def from_path(cls, path): |
77 | | - with open(path, "r") as f: |
78 | | - config = yaml.safe_load(f)["model"] |
79 | | - |
80 | | - model_name = config["model_name"] |
81 | | - provider = config.get("provider", None) |
82 | | - timeout = config.get("timeout", None) |
83 | | - proxies = config.get("proxies", None) |
84 | | - org_to_bill = config.get("org_to_bill", None) |
85 | | - generation_parameters = GenerationParameters.from_dict(config) |
86 | | - return cls( |
87 | | - model=model_name, |
88 | | - provider=provider, |
89 | | - timeout=timeout, |
90 | | - proxies=proxies, |
91 | | - org_to_bill=org_to_bill, |
92 | | - generation_parameters=generation_parameters, |
93 | | - ) |
94 | | - |
95 | 73 |
|
96 | 74 | class InferenceProvidersClient(LightevalModel): |
97 | 75 | """Client for making inference requests to various providers using the HuggingFace Inference API. |
@@ -198,7 +176,6 @@ async def bounded_api_call(prompt, num_samples): |
198 | 176 | def greedy_until( |
199 | 177 | self, |
200 | 178 | requests: list[GreedyUntilRequest], |
201 | | - override_bs: Optional[int] = None, |
202 | 179 | ) -> list[GenerativeResponse]: |
203 | 180 | """ |
204 | 181 | Generates responses using a greedy decoding strategy until certain ending conditions are met. |
@@ -256,22 +233,18 @@ def max_length(self) -> int: |
256 | 233 | logger.warning("Tokenizer was not correctly loaded. Max model context length is assumed to be 30K tokens") |
257 | 234 | return 30000 |
258 | 235 |
|
259 | | - def loglikelihood( |
260 | | - self, requests: list[LoglikelihoodRequest], override_bs: Optional[int] = None |
261 | | - ) -> list[LoglikelihoodResponse]: |
| 236 | + def loglikelihood(self, requests: list[LoglikelihoodRequest]) -> list[LoglikelihoodResponse]: |
262 | 237 | """Tokenize the context and continuation and compute the log likelihood of those |
263 | 238 | tokenized sequences. |
264 | 239 | """ |
265 | 240 | raise NotImplementedError |
266 | 241 |
|
267 | | - def loglikelihood_rolling( |
268 | | - self, requests: list[LoglikelihoodRollingRequest], override_bs: Optional[int] = None |
269 | | - ) -> list[LoglikelihoodResponse]: |
| 242 | + def loglikelihood_rolling(self, requests: list[LoglikelihoodRollingRequest]) -> list[LoglikelihoodResponse]: |
270 | 243 | """This function is used to compute the log likelihood of the context for perplexity metrics.""" |
271 | 244 | raise NotImplementedError |
272 | 245 |
|
273 | 246 | def loglikelihood_single_token( |
274 | | - self, requests: list[LoglikelihoodSingleTokenRequest], override_bs: Optional[int] = None |
| 247 | + self, requests: list[LoglikelihoodSingleTokenRequest] |
275 | 248 | ) -> list[LoglikelihoodSingleTokenResponse]: |
276 | 249 | """Tokenize the context and continuation and compute the log likelihood of those |
277 | 250 | tokenized sequences. |
|
0 commit comments