Skip to content

Commit a9eafcd

Browse files
Merge pull request #156 from BrandonGroth/gptq_warning
fix: Remove gptqmodel Warning on startup
2 parents 7167028 + 4c8e88c commit a9eafcd

File tree

3 files changed

+56
-47
lines changed

3 files changed

+56
-47
lines changed

fms_mo/custom_ext_kernels/utils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -517,8 +517,8 @@ def exllama_ops_load_and_reg(qcfg=None, run_unit_test=False):
517517
need_registration = False
518518
else:
519519
need_registration = (
520-
available_packages["exllama_kernels"]
521-
and available_packages["exllamav2_kernels"]
520+
available_packages["gptqmodel_exllama_kernels"]
521+
and available_packages["gptqmodel_exllamav2_kernels"]
522522
)
523523

524524
if not need_registration:

fms_mo/modules/linear.py

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1583,7 +1583,13 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
15831583
return x.to(in_dtype)
15841584

15851585

1586-
try:
1586+
gptq_available = (
1587+
available_packages["gptqmodel"]
1588+
and available_packages["gptqmodel_exllama_kernels"]
1589+
and available_packages["gptqmodel_exllamav2_kernels"]
1590+
)
1591+
1592+
if gptq_available:
15871593
# Third Party
15881594
from gptqmodel.nn_modules.qlinear.exllama import (
15891595
ExllamaQuantLinear as QLinearExllamaV1,
@@ -1882,12 +1888,6 @@ def forward(self, x, force_cuda=False):
18821888
x.add_(self.bias)
18831889
return x
18841890

1885-
except ModuleNotFoundError:
1886-
logger.warning(
1887-
"GPTQModel is not properly installed. "
1888-
"QLinearExv1WI4AF16 and QLinearExv2WI4AF16 wrappers will not be available."
1889-
)
1890-
18911891

18921892
class LinearFuncFPxFwdBwd(torch.autograd.Function):
18931893
"""Linear function using FP24 accumulation, experimental only.
@@ -2355,6 +2355,14 @@ def extra_repr(self) -> str:
23552355
if available_packages["mx"]:
23562356
QLinear_modules += (QLinearMX,)
23572357

2358+
if gptq_available:
2359+
QLinear_modules += (
2360+
QLinearExllamaV1,
2361+
QLinearExllamaV2,
2362+
QLinearExv1WI4AF16,
2363+
QLinearExv2WI4AF16,
2364+
)
2365+
23582366

23592367
def isinstance_qlinear(module):
23602368
"""

fms_mo/utils/custom_gptq_models.py

Lines changed: 39 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -14,41 +14,42 @@
1414

1515
"""Allow users to add new GPTQ classes for their custom models easily."""
1616

17-
# Third Party
18-
from gptqmodel.models.base import BaseGPTQModel
19-
20-
21-
class GraniteGPTQForCausalLM(BaseGPTQModel):
22-
"""Enable Granite for GPTQ."""
23-
24-
layer_type = "GraniteDecoderLayer"
25-
layers_node = "model.layers"
26-
base_modules = ["model.embed_tokens", "model.norm"]
27-
layer_modules = [
28-
["self_attn.k_proj", "self_attn.v_proj", "self_attn.q_proj"],
29-
["self_attn.o_proj"],
30-
["mlp.up_proj", "mlp.gate_proj"],
31-
["mlp.down_proj"],
32-
]
33-
34-
35-
class GraniteMoeGPTQForCausalLM(BaseGPTQModel):
36-
"""Enable Granite MOE for GPTQ."""
37-
38-
layer_type = "GraniteMoeDecoderLayer"
39-
layers_node = "model.layers"
40-
base_modules = ["model.embed_tokens", "model.norm"]
41-
layer_modules = [
42-
["self_attn.k_proj", "self_attn.v_proj", "self_attn.q_proj"],
43-
["self_attn.o_proj"],
44-
["block_sparse_moe.input_linear", "block_sparse_moe.output_linear"],
45-
]
46-
47-
48-
# NOTE: Keys in this table are huggingface config."model_type" (see the corresponding field in
49-
# config.json). Make sure you cover the ones in the model family you want to use, as they may
50-
# not be under the same model_type. See Granite as an example.
51-
custom_gptq_classes = {
52-
# "granite": GraniteGPTQForCausalLM,
53-
"granitemoe": GraniteMoeGPTQForCausalLM,
54-
}
17+
# Local
18+
from fms_mo.utils.import_utils import available_packages
19+
20+
if available_packages["gptqmodel"]:
21+
# Third Party
22+
from gptqmodel.models.base import BaseGPTQModel
23+
24+
class GraniteGPTQForCausalLM(BaseGPTQModel):
25+
"""Enable Granite for GPTQ."""
26+
27+
layer_type = "GraniteDecoderLayer"
28+
layers_node = "model.layers"
29+
base_modules = ["model.embed_tokens", "model.norm"]
30+
layer_modules = [
31+
["self_attn.k_proj", "self_attn.v_proj", "self_attn.q_proj"],
32+
["self_attn.o_proj"],
33+
["mlp.up_proj", "mlp.gate_proj"],
34+
["mlp.down_proj"],
35+
]
36+
37+
class GraniteMoeGPTQForCausalLM(BaseGPTQModel):
38+
"""Enable Granite MOE for GPTQ."""
39+
40+
layer_type = "GraniteMoeDecoderLayer"
41+
layers_node = "model.layers"
42+
base_modules = ["model.embed_tokens", "model.norm"]
43+
layer_modules = [
44+
["self_attn.k_proj", "self_attn.v_proj", "self_attn.q_proj"],
45+
["self_attn.o_proj"],
46+
["block_sparse_moe.input_linear", "block_sparse_moe.output_linear"],
47+
]
48+
49+
# NOTE: Keys in this table are huggingface config."model_type" (see the corresponding field in
50+
# config.json). Make sure you cover the ones in the model family you want to use,
51+
# as they may not be under the same model_type. See Granite as an example.
52+
custom_gptq_classes = {
53+
# "granite": GraniteGPTQForCausalLM,
54+
"granitemoe": GraniteMoeGPTQForCausalLM,
55+
}

0 commit comments

Comments
 (0)