From 782a80d81774002a5a95ccda68974386b1c9b34a Mon Sep 17 00:00:00 2001 From: cjackal <44624812+cjackal@users.noreply.github.com> Date: Thu, 3 Jul 2025 14:39:49 +0000 Subject: [PATCH 1/9] add deepseek_v3 awq mapping Signed-off-by: cjackal <44624812+cjackal@users.noreply.github.com> --- src/llmcompressor/modifiers/awq/mappings.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/llmcompressor/modifiers/awq/mappings.py b/src/llmcompressor/modifiers/awq/mappings.py index 693406ec3..7794e6425 100644 --- a/src/llmcompressor/modifiers/awq/mappings.py +++ b/src/llmcompressor/modifiers/awq/mappings.py @@ -116,6 +116,21 @@ class AWQMapping: ), ] +# DeepseekV3 +_deepseek_mappings = [ + AWQMapping( + "re:.*input_layernorm$", + ["re:.*q_a_proj$", "re:.*kv_a_proj_with_mqa$"], + ), + AWQMapping("re:.*q_a_layernorm$", ["re:.*q_b_proj$"]), + AWQMapping("re:.*kv_a_layernorm$", ["re:.*kv_b_proj$"]), + AWQMapping( + "re:.*post_attention_layernorm$", + ["re:.*gate_proj$", "re:.*up_proj$"], + ), + AWQMapping("re:.*up_proj$", ["re:.*down_proj$"]), +] + AWQ_MAPPING_REGISTRY: Dict[str, list[AWQMapping]] = { "CohereForCausalLM": _cohere_mappings, "Cohere2ForCausalLM": _cohere_mappings, @@ -131,6 +146,7 @@ class AWQMapping: "Qwen2MoeForCausalLM": _moe_default_mappings, "Qwen3ForCausalLM": _default_mappings, "Qwen3MoeForCausalLM": _moe_default_mappings, + "DeepseekV3ForCausalLM": _deepseek_mappings, } From 17cd57de8ada2e8957432c38ba4e2cad273d17ca Mon Sep 17 00:00:00 2001 From: cjackal <44624812+cjackal@users.noreply.github.com> Date: Thu, 3 Jul 2025 15:04:38 +0000 Subject: [PATCH 2/9] sort `AWQ_MAPPING_REGISTRY` in alphabetical order Signed-off-by: cjackal <44624812+cjackal@users.noreply.github.com> --- src/llmcompressor/modifiers/awq/mappings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/llmcompressor/modifiers/awq/mappings.py b/src/llmcompressor/modifiers/awq/mappings.py index 7794e6425..ca6313163 100644 --- a/src/llmcompressor/modifiers/awq/mappings.py +++ b/src/llmcompressor/modifiers/awq/mappings.py @@ -134,6 +134,7 @@ class AWQMapping: AWQ_MAPPING_REGISTRY: Dict[str, list[AWQMapping]] = { "CohereForCausalLM": _cohere_mappings, "Cohere2ForCausalLM": _cohere_mappings, + "DeepseekV3ForCausalLM": _deepseek_mappings, "Gemma2ForCausalLM": _gemma_mappings, "Gemma3ForCausalLM": _gemma_mappings, "Gemma3ForConditionalGeneration": _gemma_mappings, @@ -146,7 +147,6 @@ class AWQMapping: "Qwen2MoeForCausalLM": _moe_default_mappings, "Qwen3ForCausalLM": _default_mappings, "Qwen3MoeForCausalLM": _moe_default_mappings, - "DeepseekV3ForCausalLM": _deepseek_mappings, } From 178dcfe848b817d671a60bedc4376192f405ad87 Mon Sep 17 00:00:00 2001 From: Brian Dellabetta Date: Mon, 14 Jul 2025 13:08:16 -0600 Subject: [PATCH 3/9] include q_proj in mapping Signed-off-by: Brian Dellabetta --- src/llmcompressor/modifiers/awq/mappings.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/llmcompressor/modifiers/awq/mappings.py b/src/llmcompressor/modifiers/awq/mappings.py index ca6313163..55e9aaf26 100644 --- a/src/llmcompressor/modifiers/awq/mappings.py +++ b/src/llmcompressor/modifiers/awq/mappings.py @@ -120,7 +120,8 @@ class AWQMapping: _deepseek_mappings = [ AWQMapping( "re:.*input_layernorm$", - ["re:.*q_a_proj$", "re:.*kv_a_proj_with_mqa$"], + # Some models use q_proj + ["re:.*q_proj$", "re:.*q_a_proj$", "re:.*kv_a_proj_with_mqa$"], ), AWQMapping("re:.*q_a_layernorm$", ["re:.*q_b_proj$"]), AWQMapping("re:.*kv_a_layernorm$", ["re:.*kv_b_proj$"]), From 8e8340c62a916c987c63b8a37a038b6ef69c62af Mon Sep 17 00:00:00 2001 From: Brian Dellabetta Date: Mon, 14 Jul 2025 13:09:26 -0600 Subject: [PATCH 4/9] comment update Signed-off-by: Brian Dellabetta --- src/llmcompressor/modifiers/awq/mappings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/llmcompressor/modifiers/awq/mappings.py b/src/llmcompressor/modifiers/awq/mappings.py index 55e9aaf26..ea8155136 100644 --- a/src/llmcompressor/modifiers/awq/mappings.py +++ b/src/llmcompressor/modifiers/awq/mappings.py @@ -120,7 +120,7 @@ class AWQMapping: _deepseek_mappings = [ AWQMapping( "re:.*input_layernorm$", - # Some models use q_proj + # Some models use q_proj instead of q_a_proj ["re:.*q_proj$", "re:.*q_a_proj$", "re:.*kv_a_proj_with_mqa$"], ), AWQMapping("re:.*q_a_layernorm$", ["re:.*q_b_proj$"]), From c6ef4b20bd242e3fef0bfe32ff8ab976e224b61f Mon Sep 17 00:00:00 2001 From: Brian Dellabetta Date: Mon, 14 Jul 2025 14:20:10 -0600 Subject: [PATCH 5/9] fix OR regex mapping Signed-off-by: Brian Dellabetta --- src/llmcompressor/modifiers/awq/mappings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/llmcompressor/modifiers/awq/mappings.py b/src/llmcompressor/modifiers/awq/mappings.py index ea8155136..743f14e85 100644 --- a/src/llmcompressor/modifiers/awq/mappings.py +++ b/src/llmcompressor/modifiers/awq/mappings.py @@ -121,7 +121,7 @@ class AWQMapping: AWQMapping( "re:.*input_layernorm$", # Some models use q_proj instead of q_a_proj - ["re:.*q_proj$", "re:.*q_a_proj$", "re:.*kv_a_proj_with_mqa$"], + ["re:.*(q|q_a)_proj$", "re:.*kv_a_proj_with_mqa$"], ), AWQMapping("re:.*q_a_layernorm$", ["re:.*q_b_proj$"]), AWQMapping("re:.*kv_a_layernorm$", ["re:.*kv_b_proj$"]), From dee73717f497ae0903d7c34a0cd3077978dfe71f Mon Sep 17 00:00:00 2001 From: Brian Dellabetta Date: Wed, 16 Jul 2025 02:19:28 +0900 Subject: [PATCH 6/9] decrease memory when calculating w_mean Signed-off-by: Brian Dellabetta --- src/llmcompressor/modifiers/awq/base.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/llmcompressor/modifiers/awq/base.py b/src/llmcompressor/modifiers/awq/base.py index 7d5c3671f..6e533cc1a 100644 --- a/src/llmcompressor/modifiers/awq/base.py +++ b/src/llmcompressor/modifiers/awq/base.py @@ -465,11 +465,13 @@ def _apply_smoothing(self, model: Module) -> None: # Calculates the relative magnitude of the weights within # each of the quantization groups, and rescales each group # individually so that each group has weights on a 0-1 scale. - w_scale = weight.abs() / (weight.abs().amax(dim=1, keepdim=True) + 1e-6) + weight.abs_() + weight.div_(weight.amax(dim=1, keepdim=True) + 1e-6) # Resizes the rescaled weight matrix back up to its original dimensions - w_scale = w_scale.view(org_shape) + weight = weight.view(org_shape) # Gets the average rescaled magnitude for each output channel - w_mean = w_scale.mean(0) + w_mean = weight.mean(0) + del weight with calibration_forward_context(model), HooksMixin.disable_hooks(): # [STEP 3]: Compute output of module From d0e9cbb8c99a97397c7ab93c48f8a7deef730518 Mon Sep 17 00:00:00 2001 From: cjackal <44624812+cjackal@users.noreply.github.com> Date: Wed, 16 Jul 2025 16:00:07 +0000 Subject: [PATCH 7/9] bump `transformers>=4.52` to import `Llama4Config` from `transformers.models` Signed-off-by: cjackal <44624812+cjackal@users.noreply.github.com> --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 88fa55223..45ec66e1a 100644 --- a/setup.py +++ b/setup.py @@ -119,7 +119,7 @@ def localversion_func(version: ScmVersion) -> str: "tqdm>=4.0.0", # torch 1.10 and 1.11 do not support quantized onnx export "torch>=1.7.0,!=1.10,!=1.11", - "transformers>4.0", + "transformers>=4.52.0", "datasets", "accelerate>=0.20.3,!=1.1.0", "pynvml", From 6e852ed413c19b1c0c4d8e01e43b0a10609e3a72 Mon Sep 17 00:00:00 2001 From: cjackal <44624812+cjackal@users.noreply.github.com> Date: Thu, 17 Jul 2025 09:33:43 +0900 Subject: [PATCH 8/9] Update setup.py Co-authored-by: Brian Dellabetta Signed-off-by: cjackal <44624812+cjackal@users.noreply.github.com> --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 45ec66e1a..88fa55223 100644 --- a/setup.py +++ b/setup.py @@ -119,7 +119,7 @@ def localversion_func(version: ScmVersion) -> str: "tqdm>=4.0.0", # torch 1.10 and 1.11 do not support quantized onnx export "torch>=1.7.0,!=1.10,!=1.11", - "transformers>=4.52.0", + "transformers>4.0", "datasets", "accelerate>=0.20.3,!=1.1.0", "pynvml", From a736ba0e3b7830e580a4b439a274a41ebf1f6619 Mon Sep 17 00:00:00 2001 From: cjackal <44624812+cjackal@users.noreply.github.com> Date: Thu, 17 Jul 2025 00:38:36 +0000 Subject: [PATCH 9/9] move `Llama4Config` importation path for compatibility with wider transformers versions Signed-off-by: cjackal <44624812+cjackal@users.noreply.github.com> --- src/llmcompressor/modeling/llama4.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/llmcompressor/modeling/llama4.py b/src/llmcompressor/modeling/llama4.py index 1d98ca57b..a33833ea1 100644 --- a/src/llmcompressor/modeling/llama4.py +++ b/src/llmcompressor/modeling/llama4.py @@ -1,8 +1,10 @@ from typing import Tuple import torch -from transformers.models import Llama4Config -from transformers.models.llama4.configuration_llama4 import Llama4TextConfig +from transformers.models.llama4.configuration_llama4 import ( + Llama4Config, + Llama4TextConfig, +) from transformers.models.llama4.modeling_llama4 import ( Llama4TextExperts, Llama4TextMLP,