We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 665aacd commit 6897c60Copy full SHA for 6897c60
run_gguf.py
src/diffusers/quantizers/gguf/utils.py
@@ -74,7 +74,7 @@
74
# Consolidate DEQUANT_TYPES and MMQ_QUANT_TYPES after we add
75
# MMQ kernel for I-Matrix quantization.
76
DEQUANT_TYPES = STANDARD_QUANT_TYPES | KQUANT_TYPES | IMATRIX_QUANT_TYPES
77
-MMQ_QUANT_TYPES = STANDARD_QUANT_TYPES | KQUANT_TYPES
+MMQ_QUANT_TYPES = STANDARD_QUANT_TYPES
78
79
80
def _fused_mul_mat_gguf(x: torch.Tensor, qweight: torch.Tensor, qweight_type: int) -> torch.Tensor:
0 commit comments