Skip to content

Commit 50720e0

Browse files
authored
Merge branch 'main' into move-gptq-to-modifiers
2 parents 5c4ed3c + 778abe8 commit 50720e0

File tree

7 files changed

+42
-103
lines changed

7 files changed

+42
-103
lines changed

.MAINTAINERS

Lines changed: 0 additions & 17 deletions
This file was deleted.

examples/finetuning/configure_fsdp.md

Lines changed: 0 additions & 15 deletions
This file was deleted.

examples/finetuning/example_alternating_recipe.yaml

Lines changed: 0 additions & 32 deletions
This file was deleted.

examples/finetuning/example_fsdp_config.yaml

Lines changed: 0 additions & 24 deletions
This file was deleted.

examples/finetuning/example_single_gpu_config.yaml

Lines changed: 0 additions & 15 deletions
This file was deleted.

src/llmcompressor/modifiers/awq/mappings.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,30 @@ class AWQMapping:
192192
),
193193
]
194194

195+
# AFMOE uses dual normalization: pre_mlp_layernorm feeds the MLP
196+
# (not post_attention_layernorm) and attention has its own gate_proj
197+
# for gating mechanism
198+
_afmoe_mappings = [
199+
AWQMapping(
200+
"re:.*input_layernorm$",
201+
[
202+
"re:.*self_attn.q_proj$",
203+
"re:.*self_attn.k_proj$",
204+
"re:.*self_attn.v_proj$",
205+
"re:.*self_attn.gate_proj$",
206+
],
207+
),
208+
AWQMapping("re:.*v_proj$", ["re:.*o_proj$"]),
209+
AWQMapping(
210+
"re:.*pre_mlp_layernorm$",
211+
["re:.*mlp.*gate_proj$", "re:.*mlp.*up_proj$"],
212+
),
213+
AWQMapping(
214+
"re:.*up_proj$",
215+
["re:.*down_proj$"],
216+
),
217+
]
218+
195219
# Example mapping for MoE models with parallel transformer blocks, where
196220
# attention and MoE share the same input. This is the only case where
197221
# activation_hook_target is needed. Without it, the hook lands on
@@ -217,6 +241,7 @@ class AWQMapping:
217241
]
218242

219243
AWQ_MAPPING_REGISTRY: dict[str, list[AWQMapping]] = {
244+
"AfmoeForCausalLM": _afmoe_mappings,
220245
"BloomForCausalLM": _bloom_mappings,
221246
"CohereForCausalLM": _cohere_mappings,
222247
"Cohere2ForCausalLM": _cohere_mappings,

src/llmcompressor/modifiers/transform/smoothquant/utils.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,22 @@
6666
),
6767
]
6868

69+
AFMOE_SMOOTHQUANT_MAPPINGS: list[LayerMap] = [
70+
LayerMap(
71+
balance_layers=[
72+
"re:.*self_attn\\.q_proj",
73+
"re:.*self_attn\\.k_proj",
74+
"re:.*self_attn\\.v_proj",
75+
"re:.*self_attn\\.gate_proj",
76+
],
77+
smooth_layers="re:.*input_layernorm",
78+
),
79+
LayerMap(
80+
balance_layers=["re:.*mlp.*gate_proj", "re:.*mlp.*up_proj"],
81+
smooth_layers="re:.*pre_mlp_layernorm",
82+
),
83+
]
84+
6985

7086
# Registry of layer mappings for different architectures
7187
# Add more mappings here
@@ -85,6 +101,7 @@
85101
"Qwen2ForCausalLM": DEFAULT_SMOOTHQUANT_MAPPINGS,
86102
"Qwen3ForCausalLM": DEFAULT_SMOOTHQUANT_MAPPINGS,
87103
"WhisperForConditionalGeneration": WHISPER_V2_SMOOTHQUANT_MAPPINGS,
104+
"AfmoeForCausalLM": AFMOE_SMOOTHQUANT_MAPPINGS,
88105
}
89106

90107

0 commit comments

Comments
 (0)