Add MXFP4 GGUF QuantizationType (#1677)

CISC · ngxson · web-flow · commit e841a532dcff · 2025-08-07T22:29:10.000+02:00
Added in GPT-OSS PR ggml-org/llama.cpp#15091 --------- Co-authored-by: Xuan-Son Nguyen <son@huggingface.co>
diff --git a/packages/gguf/src/gguf.spec.ts b/packages/gguf/src/gguf.spec.ts
@@ -294,8 +294,8 @@ describe("gguf", () => {
 
 	// Quantization handler
 
-	it("should have GGUF_QUANT_ORDER in sync with GGMLQuantizationType enum", () => {
-		const enumValues = Object.values(GGMLQuantizationType).filter((value) => typeof value === "number") as number[];
+	it("should have GGUF_QUANT_ORDER in sync with GGMLFileQuantizationType enum", () => {
+		const enumValues = Object.values(GGMLFileQuantizationType).filter((value) => typeof value === "number") as number[];
 		const checkValues = new Set(GGUF_QUANT_ORDER);
 		for (const value of enumValues) {
 			expect(checkValues).toContain(value);
diff --git a/packages/gguf/src/quant-descriptions.ts b/packages/gguf/src/quant-descriptions.ts
@@ -132,6 +132,10 @@ export const GGUF_QUANT_DESCRIPTIONS: Record<GGMLQuantizationType, { txt: string
 		txt: "Ternary quantization.",
 		src_url: "https://github.com/ggml-org/llama.cpp/pull/8151",
 	},
+	[GGMLQuantizationType.MXFP4]: {
+		txt: "4-bit Microscaling Block Floating Point.",
+		src_url: "https://github.com/ggml-org/llama.cpp/pull/15091",
+	},
 };
 
 const QK_K = 256;
@@ -173,4 +177,5 @@ export const GGML_QUANT_SIZES = {
 	[GGMLQuantizationType.BF16]: calcBPW(1, 2),
 	[GGMLQuantizationType.TQ1_0]: calcBPW(256, 2 + 4 * 13),
 	[GGMLQuantizationType.TQ2_0]: calcBPW(256, 2 + 64),
+	[GGMLQuantizationType.MXFP4]: calcBPW(32, 1 + 16),
 };
diff --git a/packages/tasks/src/gguf.ts b/packages/tasks/src/gguf.ts
@@ -40,6 +40,7 @@ export enum GGMLFileQuantizationType {
 	Q4_0_8_8 = 35,
 	TQ1_0 = 36,
 	TQ2_0 = 37,
+	MXFP4_MOE = 38,
 
 	// custom quants used by unsloth
 	// they are not officially a scheme enum value in GGUF, but only here for naming
@@ -95,6 +96,7 @@ export const GGUF_QUANT_ORDER: GGMLFileQuantizationType[] = [
 	GGMLFileQuantizationType.Q4_1,
 	GGMLFileQuantizationType.Q4_2,
 	GGMLFileQuantizationType.Q4_3,
+	GGMLFileQuantizationType.MXFP4_MOE,
 
 	// 3-bit quantizations
 	GGMLFileQuantizationType.Q3_K_XL,
@@ -197,4 +199,5 @@ export enum GGMLQuantizationType {
 	BF16 = 30,
 	TQ1_0 = 34,
 	TQ2_0 = 35,
+	MXFP4 = 39,
 }