Fix TextGenerationResponse import from hfh (#129)

Wauplin · web-flow · commit 53cc1360a874 · 2024-03-26T11:28:25.000+01:00
diff --git a/pyproject.toml b/pyproject.toml
@@ -53,7 +53,7 @@ keywords = ["evaluation", "nlp", "llm"]
 dependencies = [
     # Base dependencies
     "transformers>=4.38.0",
-    "huggingface_hub>=0.21.2",
+    "huggingface_hub>=0.22.0",
     "torch>=2.0",
     "GitPython>=3.1.41", # for logging
     "datasets>=2.14.0",
diff --git a/src/lighteval/models/endpoint_model.py b/src/lighteval/models/endpoint_model.py
@@ -29,10 +29,10 @@
     InferenceClient,
     InferenceEndpoint,
     InferenceEndpointTimeoutError,
+    TextGenerationOutput,
     create_inference_endpoint,
     get_inference_endpoint,
 )
-from huggingface_hub.inference._text_generation import TextGenerationResponse
 from torch.utils.data import DataLoader
 from tqdm import tqdm
 from transformers import AutoTokenizer
@@ -148,7 +148,7 @@ def max_length(self):
 
     def __async_process_request(
         self, context: str, stop_tokens: list[str], max_tokens: int
-    ) -> Coroutine[None, list[TextGenerationResponse], str]:
+    ) -> Coroutine[None, list[TextGenerationOutput], str]:
         # Todo: add an option to launch with conversational instead for chat prompts
         # https://huggingface.co/docs/huggingface_hub/v0.20.3/en/package_reference/inference_client#huggingface_hub.AsyncInferenceClient.conversational
         generated_text = self.async_client.text_generation(
@@ -162,7 +162,7 @@ def __async_process_request(
 
         return generated_text
 
-    def __process_request(self, context: str, stop_tokens: list[str], max_tokens: int) -> TextGenerationResponse:
+    def __process_request(self, context: str, stop_tokens: list[str], max_tokens: int) -> TextGenerationOutput:
         # Todo: add an option to launch with conversational instead for chat prompts
         # https://huggingface.co/docs/huggingface_hub/v0.20.3/en/package_reference/inference_client#huggingface_hub.AsyncInferenceClient.conversational
         generated_text = self.client.text_generation(
@@ -179,7 +179,7 @@ def __process_request(self, context: str, stop_tokens: list[str], max_tokens: in
     async def __async_process_batch_generate(
         self,
         requests: list[GreedyUntilRequest | GreedyUntilWithLogitsRequest],
-    ) -> list[TextGenerationResponse]:
+    ) -> list[TextGenerationOutput]:
         return await asyncio.gather(
             *[
                 self.__async_process_request(
@@ -194,7 +194,7 @@ async def __async_process_batch_generate(
     def __process_batch_generate(
         self,
         requests: list[GreedyUntilRequest | GreedyUntilWithLogitsRequest],
-    ) -> list[TextGenerationResponse]:
+    ) -> list[TextGenerationOutput]:
         return [
             self.__process_request(
                 context=request.context,
@@ -206,7 +206,7 @@ def __process_batch_generate(
 
     async def __async_process_batch_logprob(
         self, requests: list[LoglikelihoodRequest], rolling: bool = False
-    ) -> list[TextGenerationResponse]:
+    ) -> list[TextGenerationOutput]:
         return await asyncio.gather(
             *[
                 self.__async_process_request(
@@ -220,7 +220,7 @@ async def __async_process_batch_logprob(
 
     def __process_batch_logprob(
         self, requests: list[LoglikelihoodRequest], rolling: bool = False
-    ) -> list[TextGenerationResponse]:
+    ) -> list[TextGenerationOutput]:
         return [
             self.__process_request(
                 context=request.context if rolling else request.context + request.choice,