Skip to content

Commit c51ce60

Browse files
gptqmodel has renamed the package names from exllama_kernels to gptqmodel_exllama_kernels
1 parent 2be858a commit c51ce60

File tree

4 files changed

+11
-9
lines changed

4 files changed

+11
-9
lines changed

.pylintrc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,8 +64,8 @@ ignore-patterns=^\.#
6464
# and thus existing member attributes cannot be deduced by static analysis). It
6565
# supports qualified module names, as well as Unix pattern matching.
6666
ignored-modules=gptqmodel,
67-
exllama_kernels,
68-
exllamav2_kernels,
67+
gptqmodel_exllama_kernels,
68+
gptqmodel_exllamav2_kernels,
6969
llmcompressor,
7070
cutlass_mm,
7171
pygraphviz,

fms_mo/custom_ext_kernels/utils.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -529,8 +529,8 @@ def exllama_ops_load_and_reg(qcfg=None, run_unit_test=False):
529529
return
530530

531531
# Third Party
532-
import exllama_kernels
533-
import exllamav2_kernels
532+
import gptqmodel_exllama_kernels
533+
import gptqmodel_exllamav2_kernels
534534

535535
# Register op
536536
@reg_op(f"{namespace}::exv1_i4f16")
@@ -547,7 +547,7 @@ def exv1_i4f16_impl(x, q4, q4_width):
547547
(x.shape[0], q4_width), dtype=torch.float16, device=x.device
548548
)
549549

550-
exllama_kernels.q4_matmul(x, q4, output)
550+
gptqmodel_exllama_kernels.q4_matmul(x, q4, output)
551551
return output.view(outshape)
552552

553553
# Abstract implementation
@@ -575,7 +575,9 @@ def exv2_i4f16_impl(x, q_handle, q4_width, force_cuda):
575575
(x.shape[0], q4_width), dtype=torch.float16, device=x.device
576576
)
577577

578-
exllamav2_kernels.gemm_half_q_half(x, q_handle, output, force_cuda)
578+
gptqmodel_exllamav2_kernels.gemm_half_q_half(
579+
x, q_handle, output, force_cuda
580+
)
579581
return output.view(outshape)
580582

581583
# Abstract implementation

fms_mo/modules/linear.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1501,14 +1501,14 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
15011501

15021502
try:
15031503
# Third Party
1504-
from exllama_kernels import prepare_buffers, set_tuning_params
15051504
from gptqmodel.nn_modules.qlinear.exllama import (
15061505
ExllamaQuantLinear as QLinearExllamaV1,
15071506
)
15081507
from gptqmodel.nn_modules.qlinear.exllamav2 import (
15091508
ExllamaV2QuantLinear as QLinearExllamaV2,
15101509
)
15111510
from gptqmodel.nn_modules.qlinear.exllamav2 import ext_gemm_half_q_half
1511+
from gptqmodel_exllama_kernels import prepare_buffers, set_tuning_params
15121512
from transformers.pytorch_utils import Conv1D
15131513

15141514
class QLinearExv1WI4AF16(QLinearExllamaV1):

fms_mo/utils/import_utils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,8 @@
2222

2323
optional_packages = [
2424
"gptqmodel",
25-
"exllama_kernels",
26-
"exllamav2_kernels",
25+
"gptqmodel_exllama_kernels",
26+
"gptqmodel_exllamav2_kernels",
2727
"llmcompressor",
2828
"mx",
2929
"matplotlib",

0 commit comments

Comments
 (0)