We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 323adac commit b6eb9e8Copy full SHA for b6eb9e8
1 file changed
swift/megatron/utils/config.py
@@ -110,8 +110,8 @@ def convert_hf_config(config) -> Dict[str, Any]:
110
interleave_moe_layer_step = res.pop('interleave_moe_layer_step', None)
111
window_size = res.pop('window_size', None)
112
if llm_architectures in {'Qwen3ForCausalLM', 'Qwen3MoeForCausalLM', 'Qwen3NextForCausalLM'} or architectures in {
113
- 'Qwen3OmniMoeForConditionalGeneration', 'Qwen3VLForConditionalGeneration',
114
- 'Qwen3VLMoeForConditionalGeneration'
+ 'Qwen3OmniMoeForConditionalGeneration', 'Qwen3OmniForConditionalGeneration',
+ 'Qwen3VLForConditionalGeneration', 'Qwen3VLMoeForConditionalGeneration'
115
}:
116
res['qk_layernorm'] = True
117
if llm_architectures in {'Qwen2MoeForCausalLM', 'Qwen3MoeForCausalLM', 'Qwen3NextForCausalLM'} or architectures in {
0 commit comments