bug fix

chichun-charlie-liu · chichun-charlie-liu · commit 3d6e242f0e31 · 2025-06-02T17:34:37.000Z
Signed-off-by: cliu-us &lt;cliu@us.ibm.com&gt;
diff --git a/fms_mo/custom_ext_kernels/utils.py b/fms_mo/custom_ext_kernels/utils.py
@@ -613,7 +613,9 @@ def exv2_i4f16_fxinputs_impl(
                 (x.shape[0], q4_width), dtype=torch.float16, device=x.device
             )
 
-            exllamav2_kernels.gemm_half_q_half(x, q_handle, output, force_cuda)
+            gptqmodel.exllamav2_kernels.gemm_half_q_half(
+                x, q_handle, output, force_cuda
+            )
             return output.view(outshape)
 
         # Abstract implementation

Original file line number	Diff line number	Diff line change
`@@ -613,7 +613,9 @@ def exv2_i4f16_fxinputs_impl(`
`613`	`613`	`(x.shape[0], q4_width), dtype=torch.float16, device=x.device`
`614`	`614`	`)`
`615`	`615`
`616`		`- exllamav2_kernels.gemm_half_q_half(x, q_handle, output, force_cuda)`
	`616`	`+ gptqmodel.exllamav2_kernels.gemm_half_q_half(`
	`617`	`+ x, q_handle, output, force_cuda`
	`618`	`+ )`
`617`	`619`	`return output.view(outshape)`
`618`	`620`
`619`	`621`	`# Abstract implementation`