We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent c51ce60 commit 3d6e242Copy full SHA for 3d6e242
fms_mo/custom_ext_kernels/utils.py
@@ -613,7 +613,9 @@ def exv2_i4f16_fxinputs_impl(
613
(x.shape[0], q4_width), dtype=torch.float16, device=x.device
614
)
615
616
- exllamav2_kernels.gemm_half_q_half(x, q_handle, output, force_cuda)
+ gptqmodel.exllamav2_kernels.gemm_half_q_half(
617
+ x, q_handle, output, force_cuda
618
+ )
619
return output.view(outshape)
620
621
# Abstract implementation
0 commit comments