45
45
LlamaForCausalLMPipe ,
46
46
Qwen2ForCausalLM ,
47
47
Qwen2ForCausalLMPipe ,
48
+ Qwen2MoeForCausalLM ,
49
+ Qwen2MoeForCausalLMPipe ,
50
+ Qwen3ForCausalLM ,
51
+ Qwen3ForCausalLMPipe ,
52
+ Qwen3MoeForCausalLM ,
53
+ Qwen3MoeForCausalLMPipe ,
48
54
)
49
55
from paddleformers .transformers .configuration_utils import LlmMetaConfig
50
56
from paddleformers .trl import DPOTrainer
51
57
from paddleformers .trl .llm_utils import get_lora_target_modules
52
58
from paddleformers .utils .log import logger
53
59
54
- flash_mask_support_list = [Qwen2ForCausalLM , Qwen2ForCausalLMPipe , LlamaForCausalLM , LlamaForCausalLMPipe ]
60
+ flash_mask_support_list = [
61
+ LlamaForCausalLM ,
62
+ LlamaForCausalLMPipe ,
63
+ Qwen2ForCausalLM ,
64
+ Qwen2ForCausalLMPipe ,
65
+ Qwen2MoeForCausalLM ,
66
+ Qwen2MoeForCausalLMPipe ,
67
+ Qwen3ForCausalLM ,
68
+ Qwen3ForCausalLMPipe ,
69
+ Qwen3MoeForCausalLM ,
70
+ Qwen3MoeForCausalLMPipe ,
71
+ ]
55
72
56
73
57
74
def main ():
@@ -123,7 +140,6 @@ def main():
123
140
model_config = AutoConfig .from_pretrained (
124
141
model_args .model_name_or_path ,
125
142
dtype = dtype ,
126
- download_hub = model_args .download_hub ,
127
143
)
128
144
model_config ._attn_implementation = model_args .attn_impl
129
145
@@ -133,7 +149,6 @@ def main():
133
149
ref_model_config = AutoConfig .from_pretrained (
134
150
model_args .model_name_or_path ,
135
151
dtype = dtype ,
136
- download_hub = model_args .download_hub ,
137
152
)
138
153
LlmMetaConfig .set_llm_config (ref_model_config , training_args )
139
154
@@ -148,7 +163,6 @@ def main():
148
163
model = model_class .from_pretrained (
149
164
model_args .model_name_or_path ,
150
165
config = model_config ,
151
- download_hub = model_args .download_hub ,
152
166
convert_from_hf = training_args .convert_from_hf ,
153
167
)
154
168
# for DPO save
@@ -170,11 +184,9 @@ def main():
170
184
raise NotImplementedError (f"{ model .__class__ } not support flash mask." )
171
185
172
186
if model_args .tokenizer_name_or_path is not None :
173
- tokenizer = AutoTokenizer .from_pretrained (
174
- model_args .tokenizer_name_or_path , download_hub = model_args .download_hub
175
- )
187
+ tokenizer = AutoTokenizer .from_pretrained (model_args .tokenizer_name_or_path )
176
188
else :
177
- tokenizer = AutoTokenizer .from_pretrained (model_args .model_name_or_path , download_hub = model_args . download_hub )
189
+ tokenizer = AutoTokenizer .from_pretrained (model_args .model_name_or_path )
178
190
179
191
logger .info ("Loading model & tokenizer successfully !" )
180
192
0 commit comments