@@ -298,14 +298,14 @@ def mock_iter(self):
298298 def test_run_serverless (self ):
299299 generator = HuggingFaceAPIGenerator (
300300 api_type = HFGenerationAPIType .SERVERLESS_INFERENCE_API ,
301- api_params = {"model" : "mistralai/Mistral-7B-Instruct-v0.3 " },
301+ api_params = {"model" : "microsoft/Phi-3.5-mini-instruct " },
302302 generation_kwargs = {"max_new_tokens" : 20 },
303303 )
304304
305305 # You must include the instruction tokens in the prompt. HF does not add them automatically.
306306 # Without them the model will behave erratically.
307307 response = generator .run (
308- "<s>[INST] What is the capital of France? Be concise only provide the capital, nothing else.[/INST] "
308+ "<|user|> \n What is the capital of France? Be concise only provide the capital, nothing else.<|end|> \n <|assistant|> \n "
309309 )
310310
311311 # Assert that the response contains the generated replies
@@ -329,12 +329,14 @@ def test_run_serverless(self):
329329 def test_live_run_streaming_check_completion_start_time (self ):
330330 generator = HuggingFaceAPIGenerator (
331331 api_type = HFGenerationAPIType .SERVERLESS_INFERENCE_API ,
332- api_params = {"model" : "HuggingFaceH4/zephyr-7b-beta " },
332+ api_params = {"model" : "microsoft/Phi-3.5-mini-instruct " },
333333 generation_kwargs = {"max_new_tokens" : 30 },
334334 streaming_callback = streaming_callback_handler ,
335335 )
336336
337- results = generator .run ("You are a helpful agent that answers questions. What is the capital of France?" )
337+ results = generator .run (
338+ "<|user|>\n What is the capital of France? Be concise only provide the capital, nothing else.<|end|>\n <|assistant|>\n "
339+ )
338340
339341 # Assert that the response contains the generated replies
340342 assert "replies" in results
0 commit comments