Update Int4WeightOnlyConfig VERSION argument (#2754)

jerryzh168 · web-flow · commit ea3691eb5a0e · 2025-08-13T18:10:47.000-04:00
Update Int4WeightOnlyConfig argument VERSION

Summary:
This is missed from previous PRs, we want to use `version` instead

Test Plan:
python test/quantization/quantize_/workflows/int4/test_int4_tensor.py
python test/quantization/quantize_/workflows/int4/test_int4_preshuffled_tensor.py

Reviewers:

Subscribers:

Tasks:

Tags:
diff --git a/test/quantization/quantize_/workflows/int4/test_int4_preshuffled_tensor.py b/test/quantization/quantize_/workflows/int4/test_int4_preshuffled_tensor.py
@@ -30,7 +30,7 @@
 BF16_ACT_CONFIG = Int4WeightOnlyConfig(
     group_size=128,
     packing_format="preshuffled",
-    VERSION=2,
+    version=2,
 )
 
 FP8_ACT_CONFIG = Float8DynamicActivationInt4WeightConfig(
diff --git a/test/quantization/quantize_/workflows/int4/test_int4_tensor.py b/test/quantization/quantize_/workflows/int4/test_int4_tensor.py
@@ -27,7 +27,7 @@ def setUp(self):
         self.config = Int4WeightOnlyConfig(
             group_size=128,
             packing_format="plain",
-            VERSION=2,
+            version=2,
         )
         self.GPU_DEVICES = ["cuda"] if torch.cuda.is_available() else []
 
diff --git a/torchao/quantization/quant_api.py b/torchao/quantization/quant_api.py
@@ -1013,7 +1013,7 @@ class Int4WeightOnlyConfig(AOBaseConfig):
         `zero_point_domain`: data type of zeros points, choices are [ZeroPointDomain.FLOAT, ZeroPointDomain.INT, ZeroPointDomain.NONE]
         `set_inductor_config`: if True, adjusts `torchinductor` settings to recommended values.
         `preserve_zero`: whether to preserve zero, default is None. Will be set to True if zero_point_domain is ZeroPointDomain.INT
-        `packing_format`: the packing format for int4 tensor, available from VERSION 2 and above
+        `packing_format`: the packing format for int4 tensor, available from version 2 and above
     """
 
     group_size: int = 128
@@ -1022,9 +1022,9 @@ class Int4WeightOnlyConfig(AOBaseConfig):
     zero_point_domain: Optional[ZeroPointDomain] = ZeroPointDomain.NONE
     set_inductor_config: bool = True
     preserve_zero: Optional[bool] = None
-    # only used in VERSION >= 2
+    # only used in version >= 2
     packing_format: PackingFormat = PackingFormat.PLAIN
-    VERSION: int = 1
+    version: int = 1
 
     def __post_init__(self):
         torch._C._log_api_usage_once("torchao.quantization.Int4WeightOnlyConfig")
@@ -1055,7 +1055,7 @@ def _int4_weight_only_quantize_tensor(weight, config):
 
     block_size = tuple([1 for _ in range(weight.ndim - 1)] + [group_size])
 
-    if config.VERSION == 2:
+    if config.version == 2:
         block_size = list(block_size)
         if packing_format == PackingFormat.PRESHUFFLED:
             new_weight = Int4PreshuffledTensor.from_hp(
@@ -1073,7 +1073,7 @@ def _int4_weight_only_quantize_tensor(weight, config):
         else:
             raise ValueError(f"Unsupported packing format: {packing_format}")
 
-    assert config.VERSION == 1
+    assert config.version == 1
 
     mapping_type = MappingType.ASYMMETRIC
     target_dtype = torch.int32

Original file line number	Diff line number	Diff line change
`@@ -30,7 +30,7 @@`
`30`	`30`	`BF16_ACT_CONFIG = Int4WeightOnlyConfig(`
`31`	`31`	`group_size=128,`
`32`	`32`	`packing_format="preshuffled",`
`33`		`- VERSION=2,`
	`33`	`+ version=2,`
`34`	`34`	`)`
`35`	`35`
`36`	`36`	`FP8_ACT_CONFIG = Float8DynamicActivationInt4WeightConfig(`
Original file line number	Diff line number	Diff line change
`@@ -27,7 +27,7 @@ def setUp(self):`
`27`	`27`	`self.config = Int4WeightOnlyConfig(`
`28`	`28`	`group_size=128,`
`29`	`29`	`packing_format="plain",`
`30`		`- VERSION=2,`
	`30`	`+ version=2,`
`31`	`31`	`)`
`32`	`32`	`self.GPU_DEVICES = ["cuda"] if torch.cuda.is_available() else []`
`33`	`33`