Switch PARQ to new QAT API

lisjin · lisjin · commit 8cd670859b02 · 2025-08-13T07:58:31.000-07:00
diff --git a/test/prototype/test_parq.py b/test/prototype/test_parq.py
@@ -26,14 +26,13 @@
     UnifQuantizer,
     UnifTorchaoQuantizer,
 )
-from torchao.prototype.parq.quant.quant_api import StretchedIntxWeightOnlyConfig
+from torchao.prototype.parq.quant.quant_api import (
+    PARQATConfig,
+    StretchedIntxWeightOnlyConfig,
+)
 from torchao.prototype.parq.quant.uniform_torchao import _BIT_WIDTH_TO_DTYPE
 from torchao.quantization.granularity import PerGroup
-from torchao.quantization.qat import (
-    FromIntXQuantizationAwareTrainingConfig,
-    IntxFakeQuantizeConfig,
-    IntXQuantizationAwareTrainingConfig,
-)
+from torchao.quantization.qat import IntxFakeQuantizeConfig
 from torchao.quantization.quant_api import (
     Int8DynamicActivationIntxWeightConfig,
     IntxWeightOnlyConfig,
@@ -283,7 +282,7 @@ def test_intx_weight_only_parq_equivalent(self, b: int = 2, group_size: int = 32
         quantizer_ref = UnifQuantizer()
         quantizer = StretchedUnifTorchaoQuantizer(b)
 
-        for n, module in model.named_children():
+        for module in model.children():
             if not _is_linear(module):
                 continue
 
@@ -382,24 +381,31 @@ def test_int8_dynamic_activation_intx_e2e(
         optimizer.step()
 
         # apply torchao quantized activations on top
+        base_config = None
         activation_config = IntxFakeQuantizeConfig(
-            torch.int8,
-            granularity="per_token",
-            mapping_type=config.act_mapping_type,
+            torch.int8, "per_token", is_symmetric=False
         )
-        filter_fn = optimizer.get_filter_fn(model)
-        quantize_(
-            model,
-            IntXQuantizationAwareTrainingConfig(activation_config=activation_config),
-            filter_fn=filter_fn,
+        qat_config = PARQATConfig(
+            base_config,
+            activation_config=activation_config,
+            weight_config=None,
+            step="prepare",
         )
+        filter_fn = optimizer.get_filter_fn(model)
+        quantize_(model, qat_config, filter_fn=filter_fn)
         out = model(x)
         torch.testing.assert_close(out, ref_out, atol=0, rtol=0)
 
         # equivalent to torchao's convert step
         model.eval()
         optimizer.restore_latent_params()
-        quantize_(model, FromIntXQuantizationAwareTrainingConfig(), filter_fn=filter_fn)
+        qat_config = PARQATConfig(
+            base_config,
+            activation_config=activation_config,
+            weight_config=None,
+            step="convert",
+        )
+        quantize_(model, qat_config, filter_fn=filter_fn)
         quantize_(model, config, filter_fn=filter_fn)
         converted_out = model(x)
         torch.testing.assert_close(converted_out, ref_out, atol=0, rtol=0)
diff --git a/torchao/prototype/parq/optim/quantopt.py b/torchao/prototype/parq/optim/quantopt.py
@@ -4,6 +4,7 @@
 # This source code is licensed under the BSD 3-Clause license found in the
 # LICENSE file in the root directory of this source tree.
 
+import json
 from collections import defaultdict
 from collections.abc import Callable
 from functools import partial
@@ -14,7 +15,7 @@
 from torch.optim import Optimizer
 
 from ..quant import Quantizer
-from ..utils import HAS_DTENSOR, is_dtensor
+from ..utils import HAS_DTENSOR, instantiate_module, is_dtensor
 from .proxmap import ProxMap
 
 if HAS_DTENSOR:
@@ -172,9 +173,7 @@ def step(self, closure: Optional[Callable[[], float]] = None) -> Optional[float]
         for group in self.regularized_param_groups():
             # Override quantizer if specified in the group
             if "quant_cls" in group:
-                quant_cls = instantiate_module(
-                    f"{parq.__name__}.quant", group["quant_cls"]
-                )
+                quant_cls = instantiate_module("..quant", group["quant_cls"])
                 quant_kwargs = (
                     json.loads(group["quant_kwargs"]) if "quant_kwargs" in group else {}
                 )
@@ -201,9 +200,9 @@ def step(self, closure: Optional[Callable[[], float]] = None) -> Optional[float]
 
                 # reshape p according to block size if specified
                 if block_size is not None:
-                    assert (
-                        p.size(-1) % block_size == 0
-                    ), f"{p.size(-1)=} is not divisible by {block_size=}"
+                    assert p.size(-1) % block_size == 0, (
+                        f"{p.size(-1)=} is not divisible by {block_size=}"
+                    )
                     assert p.dim() <= 2, f"Invalid {p.dim()=} for {block_size=}"
                     if p.dim() == 1:
                         p = p.unsqueeze(0)
diff --git a/torchao/prototype/parq/quant/quant_api.py b/torchao/prototype/parq/quant/quant_api.py
@@ -12,6 +12,13 @@
 
 from torchao.dtypes import AffineQuantizedTensor, Layout, QDQLayout
 from torchao.quantization.granularity import PerAxis, PerGroup
+from torchao.quantization.qat import (
+    FakeQuantizedEmbedding,
+    FakeQuantizedLinear,
+    QATConfig,
+    QATStep,
+)
+from torchao.quantization.qat.api import _qat_config_transform
 from torchao.quantization.quant_api import IntxWeightOnlyConfig
 from torchao.quantization.quant_primitives import (
     _SUB_BYTE_UINT_BOUNDS,
@@ -219,3 +226,31 @@ def _stretched_intx_weight_only_transform(
     )
     module.weight = torch.nn.Parameter(weight, requires_grad=False)
     return module
+
+
+@dataclass
+class PARQATConfig(QATConfig):
+    def __post_init__(self):
+        try:
+            super().__post_init__()
+        except ValueError as e:
+            msg = str(e)
+            if msg == "One of `base_config` or `weight_config` must be specified":
+                pass
+            else:
+                raise e
+
+
+@register_quantize_module_handler(PARQATConfig)
+def _parq_config_transform(module: nn.Module, config: PARQATConfig) -> nn.Module:
+    step = config.step
+    if step == QATStep.PREPARE:
+        return _qat_config_transform(module, config)
+    elif step == QATStep.CONVERT:
+        if isinstance(module, FakeQuantizedLinear):
+            module = module.to_linear()
+        elif isinstance(module, FakeQuantizedEmbedding):
+            module = module.to_embedding()
+        return module
+    else:
+        raise ValueError("unexpected {step=} in QATConfig")
diff --git a/torchao/prototype/parq/utils.py b/torchao/prototype/parq/utils.py
@@ -4,6 +4,8 @@
 # This source code is licensed under the BSD 3-Clause license found in the
 # LICENSE file in the root directory of this source tree.
 
+from importlib import import_module
+
 import torch
 from torch import Tensor
 
@@ -15,6 +17,10 @@
     HAS_DTENSOR = False
 
 
+def instantiate_module(module_path, module_suffix):
+    return getattr(import_module(module_path), module_suffix)
+
+
 def is_dtensor(x):
     return HAS_DTENSOR and isinstance(x, DTensor)