From ae8849e00ea2b4115f451d840f1aae4549a54702 Mon Sep 17 00:00:00 2001 From: Dipika Sikka Date: Wed, 27 Aug 2025 19:49:43 +0000 Subject: [PATCH 1/4] add format --- src/compressed_tensors/quantization/quant_scheme.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/compressed_tensors/quantization/quant_scheme.py b/src/compressed_tensors/quantization/quant_scheme.py index a9c8b45a2..84ce68948 100644 --- a/src/compressed_tensors/quantization/quant_scheme.py +++ b/src/compressed_tensors/quantization/quant_scheme.py @@ -142,6 +142,16 @@ def is_preset_scheme(name: str) -> bool: ) ) +MXFP4 = dict( + weights=QuantizationArgs( + num_bits=4, + type=QuantizationType.FLOAT, + strategy=QuantizationStrategy.GROUP, + symmetric=True, + dynamic=False, + group_size=32 + ) +) NVFP4 = dict( weights=QuantizationArgs( From 3150cea797fae6a31f0b969bac5525761c36b143 Mon Sep 17 00:00:00 2001 From: Dipika Sikka Date: Thu, 28 Aug 2025 17:20:11 +0000 Subject: [PATCH 2/4] update --- .../quantization/quant_scheme.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/src/compressed_tensors/quantization/quant_scheme.py b/src/compressed_tensors/quantization/quant_scheme.py index 84ce68948..67707d54f 100644 --- a/src/compressed_tensors/quantization/quant_scheme.py +++ b/src/compressed_tensors/quantization/quant_scheme.py @@ -148,9 +148,17 @@ def is_preset_scheme(name: str) -> bool: type=QuantizationType.FLOAT, strategy=QuantizationStrategy.GROUP, symmetric=True, - dynamic=False, - group_size=32 - ) + dynamic=False, + group_size=32, + ), + input_activations=QuantizationArgs( + num_bits=4, + type=QuantizationType.FLOAT, + strategy=QuantizationStrategy.GROUP, + dynamic=True, + symmetric=True, + group_size=32, + ), ) NVFP4 = dict( From 4c117505cb9e2a723290beb2ee25fd44a030d8d9 Mon Sep 17 00:00:00 2001 From: Dipika Sikka Date: Thu, 28 Aug 2025 17:36:55 +0000 Subject: [PATCH 3/4] update --- .../quantization/quant_scheme.py | 35 +++++++++++++------ 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/src/compressed_tensors/quantization/quant_scheme.py b/src/compressed_tensors/quantization/quant_scheme.py index 67707d54f..cc6748f96 100644 --- a/src/compressed_tensors/quantization/quant_scheme.py +++ b/src/compressed_tensors/quantization/quant_scheme.py @@ -142,44 +142,57 @@ def is_preset_scheme(name: str) -> bool: ) ) -MXFP4 = dict( + +NVFP4 = dict( weights=QuantizationArgs( num_bits=4, type=QuantizationType.FLOAT, - strategy=QuantizationStrategy.GROUP, + strategy=QuantizationStrategy.TENSOR_GROUP, symmetric=True, dynamic=False, - group_size=32, + group_size=16, ), input_activations=QuantizationArgs( + num_bits=4, + type=QuantizationType.FLOAT, + strategy=QuantizationStrategy.TENSOR_GROUP, + symmetric=True, + dynamic=DynamicType.LOCAL, + group_size=16, + ), +) + +MXFP4A16 = dict( + weights=QuantizationArgs( num_bits=4, type=QuantizationType.FLOAT, strategy=QuantizationStrategy.GROUP, - dynamic=True, symmetric=True, + dynamic=False, group_size=32, - ), + ) ) -NVFP4 = dict( +MXFP4 = dict( weights=QuantizationArgs( num_bits=4, type=QuantizationType.FLOAT, - strategy=QuantizationStrategy.TENSOR_GROUP, + strategy=QuantizationStrategy.GROUP, symmetric=True, dynamic=False, - group_size=16, + group_size=32, ), input_activations=QuantizationArgs( num_bits=4, type=QuantizationType.FLOAT, - strategy=QuantizationStrategy.TENSOR_GROUP, + strategy=QuantizationStrategy.GROUP, + dynamic=True, symmetric=True, - dynamic=DynamicType.LOCAL, - group_size=16, + group_size=32, ), ) + # 8 bit integer weights and 8 bit activations quantization INT8_W8A8 = dict( weights=QuantizationArgs( From 7e4c47db2917469a7a3550dafa1c838454dd2ca2 Mon Sep 17 00:00:00 2001 From: Dipika Sikka Date: Thu, 28 Aug 2025 19:56:45 +0000 Subject: [PATCH 4/4] update --- src/compressed_tensors/quantization/quant_scheme.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/compressed_tensors/quantization/quant_scheme.py b/src/compressed_tensors/quantization/quant_scheme.py index cc6748f96..9bc45a438 100644 --- a/src/compressed_tensors/quantization/quant_scheme.py +++ b/src/compressed_tensors/quantization/quant_scheme.py @@ -344,4 +344,6 @@ def is_preset_scheme(name: str) -> bool: "FP8_BLOCK": FP8_BLOCK, "NVFP4A16": NVFP4A16, "NVFP4": NVFP4, + "MXFP4": MXFP4, + "MXFP4A16": MXFP4A16, }