Skip to content

Commit 1a792b1

Browse files
author
wangzaijun
committed
fix awq marlin quant
1 parent 99dfd53 commit 1a792b1

File tree

1 file changed

+23
-1
lines changed

1 file changed

+23
-1
lines changed

lightllm/common/quantization/awq_quant.py

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,8 @@
66
from lightllm.utils.vllm_utils import HAS_VLLM, vllm_ops, cutlass_scaled_mm
77
from lightllm.utils.light_utils import HAS_LIGHTLLM_KERNEL, light_ops
88
from typing import Any
9-
from typing import TYPE_CHECKING, Optional
9+
from typing import TYPE_CHECKING, Optional, Tuple
10+
from lightllm.utils.dist_utils import get_current_device_id
1011

1112
if TYPE_CHECKING:
1213
from lightllm.common.basemodel.layer_weights.meta_weights.mm_weight.mm_weight import MMWeightPack
@@ -118,6 +119,27 @@ def method_name(self):
118119
def quantize(self, weight: torch.Tensor):
119120
raise NotImplementedError("AWQ online quantization is not supported yet.")
120121

122+
def params_need_repack(self) -> bool:
123+
"""
124+
用于说明是否需要对量化后的权重进行repack操作,目前只有awq支持
125+
"""
126+
return True
127+
128+
def params_repack(
129+
self, weight: torch.Tensor, weight_scale: torch.Tensor, weight_zero_point: torch.Tensor, dtype_type: torch.dtype
130+
) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
131+
"""
132+
一些量化方法在将参数完成量化后,为了加速性能,还需要将参数进行重拍,使算子性能达到最优,如awq方法。
133+
"""
134+
weight = self._process_weight_after_loading(weight.cuda(get_current_device_id()))
135+
weight_scale = self._process_weight_scale_after_loading(
136+
weight_scale.cuda(get_current_device_id()).to(dtype_type)
137+
)
138+
weight_zero_point = self._process_weight_zero_point_after_loading(
139+
weight_zero_point.cuda(get_current_device_id())
140+
)
141+
return weight, weight_scale, weight_zero_point
142+
121143
def _process_weight_after_loading(self, weight: torch.Tensor) -> torch.Tensor:
122144
assert self.hf_quantization_config is not None, "hf_quantization_config is not set"
123145
self.k = weight.shape[0]

0 commit comments

Comments
 (0)