@@ -570,11 +570,15 @@ def test_run_with_tools(self, mock_check_valid_model, tools):
570570 def test_live_run_serverless (self ):
571571 generator = HuggingFaceAPIChatGenerator (
572572 api_type = HFGenerationAPIType .SERVERLESS_INFERENCE_API ,
573- api_params = {"model" : "HuggingFaceH4/zephyr-7b-beta " },
573+ api_params = {"model" : "mistralai/Mistral-7B-Instruct-v0.3 " },
574574 generation_kwargs = {"max_tokens" : 20 },
575575 )
576576
577- messages = [ChatMessage .from_user ("What is the capital of France?" )]
577+ # No need for instruction tokens here since we use the chat_completion endpoint which handles the chat
578+ # templating for us.
579+ messages = [
580+ ChatMessage .from_user ("What is the capital of France? Be concise only provide the capital, nothing else." )
581+ ]
578582 response = generator .run (messages = messages )
579583
580584 assert "replies" in response
@@ -594,12 +598,16 @@ def test_live_run_serverless(self):
594598 def test_live_run_serverless_streaming (self ):
595599 generator = HuggingFaceAPIChatGenerator (
596600 api_type = HFGenerationAPIType .SERVERLESS_INFERENCE_API ,
597- api_params = {"model" : "HuggingFaceH4/zephyr-7b-beta " },
601+ api_params = {"model" : "mistralai/Mistral-7B-Instruct-v0.3 " },
598602 generation_kwargs = {"max_tokens" : 20 },
599603 streaming_callback = streaming_callback_handler ,
600604 )
601605
602- messages = [ChatMessage .from_user ("What is the capital of France?" )]
606+ # No need for instruction tokens here since we use the chat_completion endpoint which handles the chat
607+ # templating for us.
608+ messages = [
609+ ChatMessage .from_user ("What is the capital of France? Be concise only provide the capital, nothing else." )
610+ ]
603611 response = generator .run (messages = messages )
604612
605613 assert "replies" in response
@@ -817,11 +825,15 @@ async def test_run_async_with_tools(self, tools, mock_check_valid_model):
817825 async def test_live_run_async_serverless (self ):
818826 generator = HuggingFaceAPIChatGenerator (
819827 api_type = HFGenerationAPIType .SERVERLESS_INFERENCE_API ,
820- api_params = {"model" : "HuggingFaceH4/zephyr-7b-beta " },
828+ api_params = {"model" : "mistralai/Mistral-7B-Instruct-v0.3 " },
821829 generation_kwargs = {"max_tokens" : 20 },
822830 )
823831
824- messages = [ChatMessage .from_user ("What is the capital of France?" )]
832+ # No need for instruction tokens here since we use the chat_completion endpoint which handles the chat
833+ # templating for us.
834+ messages = [
835+ ChatMessage .from_user ("What is the capital of France? Be concise only provide the capital, nothing else." )
836+ ]
825837 response = await generator .run_async (messages = messages )
826838
827839 assert "replies" in response
0 commit comments