Skip to content

Commit aa1cc09

Browse files
authored
fix machete pre quant (#4295)
1 parent 7b6cb72 commit aa1cc09

File tree

2 files changed

+5
-1
lines changed

2 files changed

+5
-1
lines changed

fastdeploy/model_executor/layers/linear.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,7 @@ def __init__(
129129
self.with_bias = with_bias
130130
self.add_bias = add_bias
131131
self.prefix = prefix
132+
self.is_quantized = fd_config.model_config.is_quantized
132133
# key
133134
if weight_key:
134135
self.weight_key = f"{prefix}.{weight_key}"

fastdeploy/model_executor/layers/quantization/weight_only.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020

2121
import paddle
2222
from paddle.nn.quant import weight_quantize
23+
from paddleformers.utils.log import logger
2324

2425
from fastdeploy import envs
2526
from fastdeploy.model_executor.layers.linear import (
@@ -159,9 +160,11 @@ def get_quant_method(self, layer) -> Optional[QuantMethodBase]:
159160
if (
160161
_ENABLE_MACHETE
161162
and envs.FD_USE_MACHETE == "1"
163+
and not layer.is_quantized
162164
and layer.weight_shape[1]
163165
and layer.weight_shape[1] % 128 == 0
164166
):
167+
logger.info("Using Machete kernel for WeightOnlyLinearMethod")
165168
return MacheteWeightOnlyLinearMethod(self)
166169
return GPUWeightOnlyLinearMethod(self)
167170

@@ -399,7 +402,7 @@ def __init__(
399402
super().__init__(quant_config)
400403

401404
def process_prequanted_weights(self, layer, state_dict) -> None:
402-
pass
405+
raise NotImplementedError("Machete kernel doesn't support prequant. Please set FD_USE_MACHETE to 0.")
403406

404407
def process_loaded_weights(self, layer, weight) -> None:
405408
from fastdeploy.model_executor.layers.quantization.ops import (

0 commit comments

Comments
 (0)