diff --git a/tests/models/hunyuan_v1_moe/test_modeling_hunyuan_v1_moe.py b/tests/models/hunyuan_v1_moe/test_modeling_hunyuan_v1_moe.py index b835f0677cfe..79d9e9f0a595 100644 --- a/tests/models/hunyuan_v1_moe/test_modeling_hunyuan_v1_moe.py +++ b/tests/models/hunyuan_v1_moe/test_modeling_hunyuan_v1_moe.py @@ -16,6 +16,7 @@ import unittest import pytest +import torch from parameterized import parameterized from transformers import is_torch_available @@ -110,10 +111,12 @@ def tearDown(self): def test_model_generation(self): # we will compele this when model file change over # pass - EXPECTED_ANSWER = "\nOkay, I need to write a short summary about the benefits of regular exercise. Let me start by recalling what I know. First," + EXPECTED_ANSWER = "\nOkay, I need to write a" prompt = "Write a short summary of the benefits of regular exercise" tokenizer = AutoTokenizer.from_pretrained("tencent/Hunyuan-A13B-Instruct") - model = AutoModelForCausalLM.from_pretrained("tencent/Hunyuan-A13B-Instruct", device_map="auto") + model = AutoModelForCausalLM.from_pretrained( + "tencent/Hunyuan-A13B-Instruct", device_map="auto", dtype=torch.bfloat16 + ) messages = [ {"role": "user", "content": prompt}, ] @@ -123,7 +126,7 @@ def test_model_generation(self): add_generation_prompt=True, return_tensors="pt", ) - generated_ids = model.generate(tokenized_chat.to(model.device), max_new_tokens=30, top_k=1) + generated_ids = model.generate(tokenized_chat.to(model.device), max_new_tokens=10, top_k=1) text = tokenizer.decode(generated_ids[0]) output = text.split("")[1] self.assertEqual(EXPECTED_ANSWER, output)