We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent a1a4fcd commit 31ab716Copy full SHA for 31ab716
src/transformers/models/llama4/configuration_llama4.py
@@ -251,9 +251,6 @@ class Llama4TextConfig(PretrainedConfig):
251
"layers.*.self_attn.k_proj": "colwise",
252
"layers.*.self_attn.v_proj": "colwise",
253
"layers.*.self_attn.o_proj": "rowwise",
254
- "layers.*.input_layernorm.weight": "sequence_parallel",
255
- "layers.*.post_attention_layernorm.weight": "sequence_parallel",
256
- "norm.weight": "sequence_parallel",
257
"layers.*.feed_forward.shared_expert.gate_proj": "local_colwise",
258
"layers.*.feed_forward.shared_expert.up_proj": "local_colwise",
259
"layers.*.feed_forward.shared_expert.down_proj": "local_rowwise",
0 commit comments