Update syntax of custom torch ops

andrea-fasoli · andrea-fasoli · commit 4f230b1a9823 · 2025-04-14T22:56:24.000Z
Signed-off-by: Andrea Fasoli &lt;andrea.fasoli@ibm.com&gt;
diff --git a/fms_mo/aiu_addons/gptq/gptq_aiu_op.py b/fms_mo/aiu_addons/gptq/gptq_aiu_op.py
@@ -36,17 +36,27 @@ def register_aiu_gptq_op():
     ):
         logger.warning("AIU op has already been registered")
         return
-
     op_namespace_id = "gptq_gemm::i4f16_fxinputs_aiu"
-    torch.library.define(
-        op_namespace_id,
-        "(Tensor x, Tensor qw, Tensor qzeros, Tensor scales, Tensor g_idx) -> Tensor",
-    )
 
     # Add implementations for the operator
-    @torch.library.impl(op_namespace_id, "default")
-    def i4f16_fxinputs_aiu(x, qw, qzeros, scales, g_idx):
-        # on AIU, GPTQ qw is [out_feat, in_feat]
+    @torch.library.custom_op(op_namespace_id, mutates_args=())
+    def i4f16_fxinputs_aiu(
+        x: torch.Tensor,
+        qw: torch.Tensor,
+        qzeros: torch.Tensor,
+        scales: torch.Tensor,
+        g_idx: torch.Tensor,
+    ) -> torch.Tensor:
+        """Implement fake processing of GPTQ W4A16 matmul. The purpose is to create a
+        node on the computational graph to be captured during compiling for AIU.
+
+        Instead of computing the weight decompression and matmul, this function returns
+        a zero tensor with the expected shape.
+
+        NOTE: on AIU, GPTQ qw is [out_feat, in_feat], while AutoGPTQ saves the quantized
+        weights as [in_feat, out_feat]
+        """
+
         outshape = x.shape[:-1] + (qw.shape[0],)
         x = x.view(-1, x.shape[-1])
         output = torch.zeros(
@@ -56,8 +66,10 @@ def i4f16_fxinputs_aiu(x, qw, qzeros, scales, g_idx):
         )
         return output.view(outshape)
 
-    @torch.library.impl_abstract(op_namespace_id)
-    def i4f16_fxinputs_aiu_abstract(x, qw, qzeros, scales, g_idx):
+    @torch.library.register_fake(op_namespace_id)
+    def _(x, qw, qzeros, scales, g_idx):
+        """OP template of I/O sizes"""
+
         outshape = x.shape[:-1] + (qw.shape[0],)
         return torch.empty(
             outshape,
diff --git a/fms_mo/aiu_addons/i8i8/i8i8_aiu_op.py b/fms_mo/aiu_addons/i8i8/i8i8_aiu_op.py
@@ -41,26 +41,18 @@ def register_aiu_i8i8_op():
     if hasattr(torch.ops, "fms_mo") and hasattr(torch.ops.fms_mo, "i8i8_aiu"):
         logger.warning("AIU op has already been registered")
         return
-
     op_namespace_id = "fms_mo::i8i8_aiu"
-    torch.library.define(
-        op_namespace_id,
-        "(Tensor x, Tensor weight, Tensor bias, Tensor qdata, "
-        "str weight_quant_type, str activ_quant_type, "
-        "bool smoothquant) "
-        "-> Tensor",
-    )
 
-    @torch.library.impl(op_namespace_id, "default")
+    @torch.library.custom_op(op_namespace_id, mutates_args=())
     def i8i8_aiu(
-        x,
-        weight,
-        bias,
-        qdata,
-        weight_quant_type,
-        activ_quant_type,
-        smoothquant,
-    ):
+        x: torch.Tensor,
+        weight: torch.Tensor,
+        bias: torch.Tensor,
+        qdata: torch.Tensor,
+        weight_quant_type: str,
+        activ_quant_type: str,
+        smoothquant: bool,
+    ) -> torch.Tensor:
         """Implement addmm of X and W.
         Support various quantization options for weights and activations.
 
@@ -86,16 +78,8 @@ def i8i8_aiu(
 
         return F.linear(x_dq.to(dtype), w_dq.to(dtype), bias.to(dtype))
 
-    @torch.library.impl_abstract(op_namespace_id)
-    def i8i8_aiu_abstract(
-        x,
-        weight,
-        bias,
-        qdata,
-        weight_quant_type,
-        activ_quant_type,
-        smoothquant,
-    ):
+    @torch.library.register_fake(op_namespace_id)
+    def _(x, weight, bias, qdata, weight_quant_type, activ_quant_type, smoothquant):
         """OP template of I/O sizes"""
 
         outshape = x.size()[:-1] + (weight.size(0),)
diff --git a/pyproject.toml b/pyproject.toml
@@ -25,7 +25,7 @@ dependencies = [
 "numpy>=1.26.4,<2.3.0",
 "accelerate>=0.20.3,!=0.34,<1.7",
 "transformers>=4.45,<4.51",
-"torch>=2.2.0,<2.6", 
+"torch>=2.4,<2.6",
 "triton>=3.0,<3.2",
 "tqdm>=4.66.2,<5.0",
 "datasets>=3.0.0,<4.0",