Skip to content

Commit 653e542

Browse files
committed
added test_meodel.py
1 parent b45e9d1 commit 653e542

File tree

1 file changed

+127
-0
lines changed

1 file changed

+127
-0
lines changed

test/test_kernel/test_model.py

Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
import shutil
2+
import sys
3+
import unittest
4+
5+
import pytest
6+
7+
sys.path.insert(0, "../..")
8+
9+
import torch
10+
from transformers import AutoModelForCausalLM, AutoTokenizer
11+
12+
from auto_round import AutoRound, AutoRoundConfig
13+
from auto_round.eval.evaluation import simple_evaluate_user_model
14+
from auto_round.testing_utils import require_autogptq, require_gptqmodel
15+
16+
17+
class LLMDataLoader:
18+
def __init__(self):
19+
self.batch_size = 1
20+
21+
def __iter__(self):
22+
for i in range(2):
23+
yield torch.ones([1, 10], dtype=torch.long)
24+
25+
26+
class TestAutoRoundTorchBackend(unittest.TestCase):
27+
28+
@classmethod
29+
def setUpClass(self):
30+
self.model_name = "facebook/opt-125m"
31+
self.save_folder = "./saved"
32+
self.llm_dataloader = LLMDataLoader()
33+
34+
def model_infer(self, model, tokenizer):
35+
prompts = [
36+
"Hello,my name is",
37+
# "The president of the United States is",
38+
# "The capital of France is",
39+
# "The future of AI is",
40+
]
41+
42+
inputs = tokenizer(prompts, return_tensors="pt", padding=False, truncation=True)
43+
44+
outputs = model.generate(
45+
input_ids=inputs["input_ids"].to(model.device),
46+
attention_mask=inputs["attention_mask"].to(model.device),
47+
do_sample=False, ## change this to follow official usage
48+
max_new_tokens=5,
49+
)
50+
generated_ids = [output_ids[len(input_ids) :] for input_ids, output_ids in zip(inputs["input_ids"], outputs)]
51+
52+
decoded_outputs = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
53+
54+
for i, prompt in enumerate(prompts):
55+
print(f"Prompt: {prompt}")
56+
print(f"Generated: {decoded_outputs[i]}")
57+
print("-" * 50)
58+
return decoded_outputs[0]
59+
60+
@classmethod
61+
def tearDownClass(self):
62+
shutil.rmtree(self.save_folder, ignore_errors=True)
63+
shutil.rmtree("runs", ignore_errors=True)
64+
65+
def test_torch_4bits_sym_cpu(self):
66+
model = AutoModelForCausalLM.from_pretrained(self.model_name, dtype="auto", trust_remote_code=True)
67+
tokenizer = AutoTokenizer.from_pretrained(self.model_name, trust_remote_code=True)
68+
bits, group_size, sym = 4, 32, True
69+
autoround = AutoRound(
70+
model,
71+
tokenizer,
72+
bits=bits,
73+
group_size=group_size,
74+
sym=sym,
75+
iters=0,
76+
seqlen=2,
77+
dataset=self.llm_dataloader,
78+
)
79+
quantized_model_path = self.save_folder
80+
autoround.quantize_and_save(output_dir=quantized_model_path, format="auto_round:gptqmodel")
81+
82+
quantization_config = AutoRoundConfig(backend="ark")
83+
model = AutoModelForCausalLM.from_pretrained(
84+
quantized_model_path, dtype=torch.float16, device_map="cpu", quantization_config=quantization_config
85+
)
86+
87+
tokenizer = AutoTokenizer.from_pretrained(self.save_folder)
88+
self.model_infer(model, tokenizer)
89+
result = simple_evaluate_user_model(model, tokenizer, batch_size=32, tasks="lambada_openai", limit=1000)
90+
print(result["results"]["lambada_openai"]["acc,none"])
91+
self.assertGreater(result["results"]["lambada_openai"]["acc,none"], 0.28)
92+
93+
shutil.rmtree("./saved", ignore_errors=True)
94+
95+
def test_torch_4bits_sym_xpu(self):
96+
model = AutoModelForCausalLM.from_pretrained(self.model_name, dtype="auto", trust_remote_code=True)
97+
tokenizer = AutoTokenizer.from_pretrained(self.model_name, trust_remote_code=True)
98+
bits, group_size, sym = 4, 32, True
99+
autoround = AutoRound(
100+
model,
101+
tokenizer,
102+
bits=bits,
103+
group_size=group_size,
104+
sym=sym,
105+
iters=0,
106+
seqlen=2,
107+
dataset=self.llm_dataloader,
108+
)
109+
quantized_model_path = self.save_folder
110+
autoround.quantize_and_save(output_dir=quantized_model_path, format="auto_round") ##will convert to gptq model
111+
112+
quantization_config = AutoRoundConfig(backend="ark")
113+
model = AutoModelForCausalLM.from_pretrained(
114+
quantized_model_path, dtype=torch.float16, device_map="xpu", quantization_config=quantization_config
115+
)
116+
117+
tokenizer = AutoTokenizer.from_pretrained(self.save_folder)
118+
self.model_infer(model, tokenizer)
119+
result = simple_evaluate_user_model(model, tokenizer, batch_size=32, tasks="lambada_openai", limit=1000)
120+
print(result["results"]["lambada_openai"]["acc,none"])
121+
self.assertGreater(result["results"]["lambada_openai"]["acc,none"], 0.28)
122+
torch.xpu.empty_cache()
123+
shutil.rmtree(self.save_folder, ignore_errors=True)
124+
125+
126+
if __name__ == "__main__":
127+
unittest.main()

0 commit comments

Comments
 (0)