Skip to content

Commit ded584f

Browse files
authored
[New Sample] Add test example to extract paddlenlp models and add 7 samples. (#249)
* Add test example to extract paddle nlp samples. * Update test codes. * Add gpt2-medium-en. * Update test code. * Add hash file for gpt2. * Add llama-7b. * Add bert-base. * Add ernie-1.0. * Add ernie-3.0. * Add nezha-base-chinese. * Add roformer.
1 parent 7685b09 commit ded584f

37 files changed

+50732
-0
lines changed

graph_net/test/nlp_model_getter.py

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
def get_auto_model_and_inputs(model_name, text, dtype):
2+
from paddlenlp.transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer
3+
4+
config = AutoConfig.from_pretrained(model_name)
5+
model = AutoModelForCausalLM.from_config(config, dtype=dtype)
6+
model = model.eval()
7+
8+
tokenizer = AutoTokenizer.from_pretrained(model_name)
9+
tokenizer.pad_token = tokenizer.eos_token
10+
inputs = tokenizer(
11+
text, return_tensors="pd", padding=True, truncation=True, max_length=2048
12+
)
13+
return model, inputs
14+
15+
16+
def get_bert_model_and_inputs(model_name, text, dtype):
17+
from paddlenlp.transformers import BertModel, BertTokenizer
18+
19+
model = BertModel.from_pretrained(model_name)
20+
model.eval()
21+
22+
tokenizer = BertTokenizer.from_pretrained(model_name)
23+
inputs = tokenizer(text, return_tensors="pd")
24+
return model, inputs
25+
26+
27+
def get_convbert_model_and_inputs(model_name, text, dtype):
28+
from paddlenlp.transformers import ConvBertModel as ModelClass
29+
from paddlenlp.transformers import ConvBertTokenizer as TokenizerClass
30+
31+
model = ModelClass.from_pretrained(model_name)
32+
model.eval()
33+
34+
tokenizer = TokenizerClass.from_pretrained(model_name)
35+
inputs = tokenizer(text, return_tensors="pd")
36+
return model, inputs
37+
38+
39+
def get_ernie_model_and_inputs(model_name, text, dtype):
40+
from paddlenlp.transformers import ErnieModel, ErnieTokenizer
41+
42+
model = ErnieModel.from_pretrained(model_name)
43+
tokenizer = ErnieTokenizer.from_pretrained(model_name)
44+
inputs = tokenizer(text, return_tensors="pd")
45+
return model, inputs
46+
47+
48+
def get_ernie_m_model_and_inputs(model_name, text, dtype):
49+
from paddlenlp.transformers import ErnieMModel as ModelClass
50+
from paddlenlp.transformers import ErnieMTokenizer as TokenizerClass
51+
52+
model = ModelClass.from_pretrained(model_name)
53+
model.eval()
54+
55+
tokenizer = TokenizerClass.from_pretrained(model_name)
56+
inputs = tokenizer(text, return_tensors="pd")
57+
return model, inputs
58+
59+
60+
def get_gpt_model_and_inputs(model_name, text, dtype):
61+
from paddlenlp.transformers import GPTModel, GPTTokenizer
62+
63+
model = GPTModel.from_pretrained(model_name)
64+
model.eval()
65+
66+
tokenizer = GPTTokenizer.from_pretrained(model_name)
67+
inputs = tokenizer(text, return_tensors="pd")
68+
inputs.pop("token_type_ids")
69+
return model, inputs
70+
71+
72+
def get_nezha_model_and_inputs(model_name, text, dtype):
73+
from paddlenlp.transformers import NeZhaModel as ModelClass
74+
from paddlenlp.transformers import NeZhaTokenizer as TokenizerClass
75+
76+
model = ModelClass.from_pretrained(model_name)
77+
tokenizer = TokenizerClass.from_pretrained(model_name)
78+
inputs = tokenizer(text, return_tensors="pd")
79+
return model, inputs
80+
81+
82+
def get_ppminilm_model_and_inputs(model_name, text, dtype):
83+
from paddlenlp.transformers import PPMiniLMModel as ModelClass
84+
from paddlenlp.transformers import PPMiniLMTokenizer as TokenizerClass
85+
86+
model = ModelClass.from_pretrained(model_name)
87+
tokenizer = TokenizerClass.from_pretrained(model_name)
88+
inputs = tokenizer(text, return_tensors="pd")
89+
return model, inputs
90+
91+
92+
def get_reformer_model_and_inputs(model_name, text, dtype):
93+
from paddlenlp.transformers import RoFormerModel as ModelClass
94+
from paddlenlp.transformers import RoFormerTokenizer as TokenizerClass
95+
96+
model = ModelClass.from_pretrained(model_name)
97+
tokenizer = TokenizerClass.from_pretrained(model_name)
98+
inputs = tokenizer(text, return_tensors="pd")
99+
return model, inputs
100+
101+
102+
def get_skep_model_and_inputs(model_name, text, dtype):
103+
from paddlenlp.transformers import SkepModel as ModelClass
104+
from paddlenlp.transformers import SkepTokenizer as TokenizerClass
105+
106+
model = ModelClass.from_pretrained(model_name)
107+
tokenizer = TokenizerClass.from_pretrained(model_name)
108+
inputs = tokenizer(text, return_tensors="pd")
109+
return model, inputs

0 commit comments

Comments
 (0)