|
41 | 41 | else: |
42 | 42 | ops = None |
43 | 43 |
|
44 | | -UNQUANTIZED_TYPES = {gguf.GGMLQuantizationType.F32, gguf.GGMLQuantizationType.F16, gguf.GGMLQuantizationType.BF16} |
45 | | -STANDARD_QUANT_TYPES = { |
| 44 | +UNQUANTIZED_TYPES = [gguf.GGMLQuantizationType.F32, gguf.GGMLQuantizationType.F16, gguf.GGMLQuantizationType.BF16] |
| 45 | +STANDARD_QUANT_TYPES = [ |
46 | 46 | gguf.GGMLQuantizationType.Q4_0, |
47 | 47 | gguf.GGMLQuantizationType.Q4_1, |
48 | 48 | gguf.GGMLQuantizationType.Q5_0, |
49 | 49 | gguf.GGMLQuantizationType.Q5_1, |
50 | 50 | gguf.GGMLQuantizationType.Q8_0, |
51 | 51 | gguf.GGMLQuantizationType.Q8_1, |
52 | | -} |
53 | | -KQUANT_TYPES = { |
| 52 | +] |
| 53 | +KQUANT_TYPES = [ |
54 | 54 | gguf.GGMLQuantizationType.Q2_K, |
55 | 55 | gguf.GGMLQuantizationType.Q3_K, |
56 | 56 | gguf.GGMLQuantizationType.Q4_K, |
57 | 57 | gguf.GGMLQuantizationType.Q5_K, |
58 | 58 | gguf.GGMLQuantizationType.Q6_K, |
59 | | -} |
60 | | -IMATRIX_QUANT_TYPES = { |
| 59 | +] |
| 60 | +IMATRIX_QUANT_TYPES = [ |
61 | 61 | gguf.GGMLQuantizationType.IQ1_M, |
62 | 62 | gguf.GGMLQuantizationType.IQ1_S, |
63 | 63 | gguf.GGMLQuantizationType.IQ2_XXS, |
|
67 | 67 | gguf.GGMLQuantizationType.IQ3_S, |
68 | 68 | gguf.GGMLQuantizationType.IQ4_XS, |
69 | 69 | gguf.GGMLQuantizationType.IQ4_NL, |
70 | | -} |
| 70 | +] |
71 | 71 | # TODO(Isotr0py): Currently, we don't have MMQ kernel for I-Matrix quantization. |
72 | 72 | # Consolidate DEQUANT_TYPES, MMVQ_QUANT_TYPES and MMQ_QUANT_TYPES after we add |
73 | 73 | # MMQ kernel for I-Matrix quantization. |
|
0 commit comments