huggingface · ngxson · Aug 7, 2025 · Aug 7, 2025 · Aug 7, 2025 · Aug 7, 2025
@@ -132,6 +132,10 @@ export const GGUF_QUANT_DESCRIPTIONS: Record<GGMLQuantizationType, { txt: string
 		txt: "Ternary quantization.",
 		src_url: "https://github.com/ggml-org/llama.cpp/pull/8151",
 	},
+	[GGMLQuantizationType.MXFP4]: {
+		txt: "4-bit Microscaling Block Floating Point.",
+		src_url: "https://github.com/ggml-org/llama.cpp/pull/15091",
+	},
 };
 
 const QK_K = 256;
@@ -173,4 +177,5 @@ export const GGML_QUANT_SIZES = {
 	[GGMLQuantizationType.BF16]: calcBPW(1, 2),
 	[GGMLQuantizationType.TQ1_0]: calcBPW(256, 2 + 4 * 13),
 	[GGMLQuantizationType.TQ2_0]: calcBPW(256, 2 + 64),
+	[GGMLQuantizationType.MXFP4]: calcBPW(32, 1 + 16),
 };
@@ -40,6 +40,7 @@ export enum GGMLFileQuantizationType {
 	Q4_0_8_8 = 35,
 	TQ1_0 = 36,
 	TQ2_0 = 37,
+	MXFP4_MOE = 38,
 
 	// custom quants used by unsloth
 	// they are not officially a scheme enum value in GGUF, but only here for naming
@@ -95,6 +96,7 @@ export const GGUF_QUANT_ORDER: GGMLFileQuantizationType[] = [
 	GGMLFileQuantizationType.Q4_1,
 	GGMLFileQuantizationType.Q4_2,
 	GGMLFileQuantizationType.Q4_3,
+	GGMLFileQuantizationType.MXFP4_MOE,
 
 	// 3-bit quantizations
 	GGMLFileQuantizationType.Q3_K_XL,
@@ -197,4 +199,5 @@ export enum GGMLQuantizationType {
 	BF16 = 30,
 	TQ1_0 = 34,
 	TQ2_0 = 35,
+	MXFP4 = 39,
 }