@@ -822,24 +822,26 @@ async def test_run_async_with_tools(self, tools, mock_check_valid_model):
822822 reason = "Export an env var called HF_API_TOKEN containing the Hugging Face token to run this test." ,
823823 )
824824 @pytest .mark .flaky (reruns = 3 , reruns_delay = 10 )
825+ @pytest .mark .asyncio
825826 async def test_live_run_async_serverless (self ):
826827 generator = HuggingFaceAPIChatGenerator (
827828 api_type = HFGenerationAPIType .SERVERLESS_INFERENCE_API ,
828829 api_params = {"model" : "microsoft/Phi-3.5-mini-instruct" },
829830 generation_kwargs = {"max_tokens" : 20 },
830831 )
831832
832- # No need for instruction tokens here since we use the chat_completion endpoint which handles the chat
833- # templating for us.
834833 messages = [
835834 ChatMessage .from_user ("What is the capital of France? Be concise only provide the capital, nothing else." )
836835 ]
837- response = await generator .run_async (messages = messages )
836+ try :
837+ response = await generator .run_async (messages = messages )
838838
839- assert "replies" in response
840- assert isinstance (response ["replies" ], list )
841- assert len (response ["replies" ]) > 0
842- assert [isinstance (reply , ChatMessage ) for reply in response ["replies" ]]
843- assert "usage" in response ["replies" ][0 ].meta
844- assert "prompt_tokens" in response ["replies" ][0 ].meta ["usage" ]
845- assert "completion_tokens" in response ["replies" ][0 ].meta ["usage" ]
839+ assert "replies" in response
840+ assert isinstance (response ["replies" ], list )
841+ assert len (response ["replies" ]) > 0
842+ assert [isinstance (reply , ChatMessage ) for reply in response ["replies" ]]
843+ assert "usage" in response ["replies" ][0 ].meta
844+ assert "prompt_tokens" in response ["replies" ][0 ].meta ["usage" ]
845+ assert "completion_tokens" in response ["replies" ][0 ].meta ["usage" ]
846+ finally :
847+ await generator ._async_client .close ()
0 commit comments