Move activation-only config to PARQ prototype

lisjin · lisjin · commit cf6df464ab1f · 2025-08-13T08:52:24.000-07:00
diff --git a/test/prototype/test_parq.py b/test/prototype/test_parq.py
@@ -26,12 +26,14 @@
     UnifQuantizer,
     UnifTorchaoQuantizer,
 )
-from torchao.prototype.parq.quant.quant_api import StretchedIntxWeightOnlyConfig
+from torchao.prototype.parq.quant.quant_api import (
+    Int8DynamicActivationOnlyConfig,
+    StretchedIntxWeightOnlyConfig,
+)
 from torchao.prototype.parq.quant.uniform_torchao import _BIT_WIDTH_TO_DTYPE
 from torchao.quantization.granularity import PerGroup
 from torchao.quantization.qat import QATConfig
 from torchao.quantization.quant_api import (
-    Int8DynActOnlyConfig,
     Int8DynamicActivationIntxWeightConfig,
     IntxWeightOnlyConfig,
     _is_linear,
@@ -392,7 +394,7 @@ def test_int8_dynamic_activation_intx_e2e(
         optimizer.step()
 
         # apply torchao quantized activations on top
-        qat_config = QATConfig(Int8DynActOnlyConfig(), step="prepare")
+        qat_config = QATConfig(Int8DynamicActivationOnlyConfig(), step="prepare")
         filter_fn = optimizer.get_filter_fn(model)
         quantize_(model, qat_config, filter_fn=filter_fn)
         out = model(x)
diff --git a/torchao/prototype/parq/quant/quant_api.py b/torchao/prototype/parq/quant/quant_api.py
@@ -10,19 +10,47 @@
 import torch
 from torch import nn
 
+from torchao.core.config import AOBaseConfig
 from torchao.dtypes import AffineQuantizedTensor, Layout, QDQLayout
-from torchao.quantization.granularity import PerAxis, PerGroup
-from torchao.quantization.quant_api import IntxWeightOnlyConfig
-from torchao.quantization.quant_primitives import (
-    _SUB_BYTE_UINT_BOUNDS,
+from torchao.quantization import (
     MappingType,
+    PerAxis,
+    PerGroup,
     ZeroPointDomain,
-    _get_reduction_params,
     dequantize_affine,
+    to_linear_activation_quantized,
+)
+from torchao.quantization.quant_api import (
+    IntxWeightOnlyConfig,
+    _int8_asymm_per_token_quant,
+    _int8_symm_per_token_reduced_range_quant,
+)
+from torchao.quantization.quant_primitives import (
+    _SUB_BYTE_UINT_BOUNDS,
+    _get_reduction_params,
 )
 from torchao.quantization.transform_module import register_quantize_module_handler
 
 
+@dataclass
+class Int8DynamicActivationOnlyConfig(AOBaseConfig):
+    is_symmetric: bool = False
+
+
+@register_quantize_module_handler(Int8DynamicActivationOnlyConfig)
+def _int8_dynamic_activation_transform(
+    module: torch.nn.Module, config: Int8DynamicActivationOnlyConfig
+) -> torch.nn.Module:
+    weight = module.weight
+    if config.is_symmetric:
+        input_quant_func = _int8_symm_per_token_reduced_range_quant
+    else:
+        input_quant_func = _int8_asymm_per_token_quant
+    weight = to_linear_activation_quantized(weight, input_quant_func)
+    module.weight = torch.nn.Parameter(weight, requires_grad=False)
+    return module
+
+
 def choose_qparams_stretched_affine(
     input_float: torch.Tensor,
     mapping_type: MappingType,
diff --git a/torchao/quantization/__init__.py b/torchao/quantization/__init__.py
@@ -54,7 +54,6 @@
     GemliteUIntXWeightOnlyConfig,
     Int4DynamicActivationInt4WeightConfig,
     Int4WeightOnlyConfig,
-    Int8DynActOnlyConfig,
     Int8DynamicActivationInt4WeightConfig,
     Int8DynamicActivationInt8WeightConfig,
     Int8DynamicActivationIntxWeightConfig,
@@ -145,7 +144,6 @@
     "Int8DynamicActivationIntxWeightConfig",
     "Int4WeightOnlyConfig",
     "Float8DynamicActivationInt4WeightConfig",
-    "Int8DynActOnlyConfig",
     "Int8WeightOnlyConfig",
     "Float8WeightOnlyConfig",
     "Float8DynamicActivationFloat8WeightConfig",
diff --git a/torchao/quantization/qat/fake_quantize_config.py b/torchao/quantization/qat/fake_quantize_config.py
@@ -290,9 +290,9 @@ def _infer_fake_quantize_configs(
     Return a 2-tuple of (activation_config, weight_config) for fake quantization.
     """
     # avoid circular imports
+    from torchao.prototype.parq.quant.quant_api import Int8DynamicActivationOnlyConfig
     from torchao.quantization import (
         Int4WeightOnlyConfig,
-        Int8DynActOnlyConfig,
         Int8DynamicActivationInt4WeightConfig,
     )
 
@@ -316,7 +316,7 @@ def _infer_fake_quantize_configs(
             zero_point_domain=base_config.zero_point_domain,
         )
         return (None, weight_config)
-    elif isinstance(base_config, Int8DynActOnlyConfig):
+    elif isinstance(base_config, Int8DynamicActivationOnlyConfig):
         act_config = IntxFakeQuantizeConfig(
             dtype=torch.int8,
             granularity="per_token",
diff --git a/torchao/quantization/quant_api.py b/torchao/quantization/quant_api.py
@@ -26,9 +26,7 @@
 import torch.nn.utils.parametrize as parametrize
 
 import torchao
-from torchao.core.config import (
-    AOBaseConfig,
-)
+from torchao.core.config import AOBaseConfig
 from torchao.dtypes import (
     AffineQuantizedTensor,
     CutlassInt4PackedLayout,
@@ -148,7 +146,6 @@
     "gemlite_uintx_weight_only",
     "float8_dynamic_activation_float8_weight",
     "float8_static_activation_float8_weight",
-    "Int8DynActOnlyConfig",
     "Int8DynActInt4WeightQuantizer",
     "Float8DynamicActivationFloat8SemiSparseWeightConfig",
     "ModuleFqnToConfig",
@@ -1313,31 +1310,6 @@ def _float8_cutlass_quant_sparse(
     )
 
 
-@dataclass
-class Int8DynActOnlyConfig(AOBaseConfig):
-    """
-    Configuration for applying int8 dynamic symmetric per-token activation quantization to linear layers.
-    Args:
-        is_symmetric: bool = False - Whether to use symmetric quantization for activations.
-    """
-
-    is_symmetric: bool = False
-
-
-@register_quantize_module_handler(Int8DynActOnlyConfig)
-def _int8_dynamic_activation_transform(
-    module: torch.nn.Module, config: Int8DynActOnlyConfig
-) -> torch.nn.Module:
-    weight = module.weight
-    if config.is_symmetric:
-        input_quant_func = _int8_symm_per_token_reduced_range_quant
-    else:
-        input_quant_func = _int8_asymm_per_token_quant
-    weight = to_linear_activation_quantized(weight, input_quant_func)
-    module.weight = torch.nn.Parameter(weight, requires_grad=False)
-    return module
-
-
 @dataclass
 class Int8DynamicActivationInt8WeightConfig(AOBaseConfig):
     """