Skip to content

Commit 3927ffe

Browse files
authored
[testing] reduce runtime of HunYuanMoEV1IntegrationTest:test_model_generation (huggingface#41373)
* fix * fix * fix --------- Co-authored-by: ydshieh <[email protected]>
1 parent 7164924 commit 3927ffe

File tree

1 file changed

+6
-3
lines changed

1 file changed

+6
-3
lines changed

tests/models/hunyuan_v1_moe/test_modeling_hunyuan_v1_moe.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
import unittest
1717

1818
import pytest
19+
import torch
1920
from parameterized import parameterized
2021

2122
from transformers import is_torch_available
@@ -99,10 +100,12 @@ def tearDown(self):
99100
def test_model_generation(self):
100101
# we will compele this when model file change over
101102
# pass
102-
EXPECTED_ANSWER = "\nOkay, I need to write a short summary about the benefits of regular exercise. Let me start by recalling what I know. First,"
103+
EXPECTED_ANSWER = "\nOkay, I need to write a"
103104
prompt = "Write a short summary of the benefits of regular exercise"
104105
tokenizer = AutoTokenizer.from_pretrained("tencent/Hunyuan-A13B-Instruct")
105-
model = AutoModelForCausalLM.from_pretrained("tencent/Hunyuan-A13B-Instruct", device_map="auto")
106+
model = AutoModelForCausalLM.from_pretrained(
107+
"tencent/Hunyuan-A13B-Instruct", device_map="auto", dtype=torch.bfloat16
108+
)
106109
messages = [
107110
{"role": "user", "content": prompt},
108111
]
@@ -112,7 +115,7 @@ def test_model_generation(self):
112115
add_generation_prompt=True,
113116
return_tensors="pt",
114117
)
115-
generated_ids = model.generate(tokenized_chat.to(model.device), max_new_tokens=30, top_k=1)
118+
generated_ids = model.generate(tokenized_chat.to(model.device), max_new_tokens=10, top_k=1)
116119
text = tokenizer.decode(generated_ids[0])
117120
output = text.split("<think>")[1]
118121
self.assertEqual(EXPECTED_ANSWER, output)

0 commit comments

Comments
 (0)