Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions packages/gguf/src/quant-descriptions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,10 @@ export const GGUF_QUANT_DESCRIPTIONS: Record<GGMLQuantizationType, { txt: string
txt: "Ternary quantization.",
src_url: "https://github.com/ggml-org/llama.cpp/pull/8151",
},
[GGMLQuantizationType.MXFP4]: {
txt: "4-bit Microscaling Block Floating Point.",
src_url: "https://github.com/ggml-org/llama.cpp/pull/15091",
},
};

const QK_K = 256;
Expand Down Expand Up @@ -173,4 +177,5 @@ export const GGML_QUANT_SIZES = {
[GGMLQuantizationType.BF16]: calcBPW(1, 2),
[GGMLQuantizationType.TQ1_0]: calcBPW(256, 2 + 4 * 13),
[GGMLQuantizationType.TQ2_0]: calcBPW(256, 2 + 64),
[GGMLQuantizationType.MXFP4]: calcBPW(32, 1 + 16),
};
3 changes: 3 additions & 0 deletions packages/tasks/src/gguf.ts
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ export enum GGMLFileQuantizationType {
Q4_0_8_8 = 35,
TQ1_0 = 36,
TQ2_0 = 37,
MXFP4_MOE = 38,

// custom quants used by unsloth
// they are not officially a scheme enum value in GGUF, but only here for naming
Expand Down Expand Up @@ -95,6 +96,7 @@ export const GGUF_QUANT_ORDER: GGMLFileQuantizationType[] = [
GGMLFileQuantizationType.Q4_1,
GGMLFileQuantizationType.Q4_2,
GGMLFileQuantizationType.Q4_3,
GGMLFileQuantizationType.MXFP4_MOE,

// 3-bit quantizations
GGMLFileQuantizationType.Q3_K_XL,
Expand Down Expand Up @@ -197,4 +199,5 @@ export enum GGMLQuantizationType {
BF16 = 30,
TQ1_0 = 34,
TQ2_0 = 35,
MXFP4 = 39,
}
Loading