foundation-model-stack
diff --git a/‎fms_mo/aiu_addons/int8/__init__.py‎ ‎fms_mo/aiu_addons/i8i8/__init__.py‎fms_mo/aiu_addons/int8/__init__.py renamed to fms_mo/aiu_addons/i8i8/__init__.py b/‎fms_mo/aiu_addons/int8/__init__.py‎ ‎fms_mo/aiu_addons/i8i8/__init__.py‎fms_mo/aiu_addons/int8/__init__.py renamed to fms_mo/aiu_addons/i8i8/__init__.py
diff --git a/‎…s_mo/aiu_addons/int8/int8_aiu_adapter.py‎ ‎…s_mo/aiu_addons/i8i8/i8i8_aiu_adapter.py‎fms_mo/aiu_addons/int8/int8_aiu_adapter.py renamed to fms_mo/aiu_addons/i8i8/i8i8_aiu_adapter.py b/‎…s_mo/aiu_addons/int8/int8_aiu_adapter.py‎ ‎…s_mo/aiu_addons/i8i8/i8i8_aiu_adapter.py‎fms_mo/aiu_addons/int8/int8_aiu_adapter.py renamed to fms_mo/aiu_addons/i8i8/i8i8_aiu_adapter.py
diff --git a/‎fms_mo/aiu_addons/int8/int8_aiu_linear.py‎ ‎fms_mo/aiu_addons/i8i8/i8i8_aiu_linear.py‎fms_mo/aiu_addons/int8/int8_aiu_linear.py renamed to fms_mo/aiu_addons/i8i8/i8i8_aiu_linear.py
Lines changed: 1 addition & 1 deletion b/‎fms_mo/aiu_addons/int8/int8_aiu_linear.py‎ ‎fms_mo/aiu_addons/i8i8/i8i8_aiu_linear.py‎fms_mo/aiu_addons/int8/int8_aiu_linear.py renamed to fms_mo/aiu_addons/i8i8/i8i8_aiu_linear.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎fms_mo/aiu_addons/int8/int8_aiu_op.py‎ ‎fms_mo/aiu_addons/i8i8/i8i8_aiu_op.py‎fms_mo/aiu_addons/int8/int8_aiu_op.py renamed to fms_mo/aiu_addons/i8i8/i8i8_aiu_op.py b/‎fms_mo/aiu_addons/int8/int8_aiu_op.py‎ ‎fms_mo/aiu_addons/i8i8/i8i8_aiu_op.py‎fms_mo/aiu_addons/int8/int8_aiu_op.py renamed to fms_mo/aiu_addons/i8i8/i8i8_aiu_op.py
diff --git a/‎tests/aiu_addons/conftest.py‎
Lines changed: 131 additions & 0 deletions b/‎tests/aiu_addons/conftest.py‎
Lines changed: 131 additions & 0 deletions
diff --git a/‎tests/aiu_addons/test_gptq_addon.py‎
Lines changed: 21 additions & 41 deletions b/‎tests/aiu_addons/test_gptq_addon.py‎
Lines changed: 21 additions & 41 deletions
diff --git a/‎tests/aiu_addons/test_int8_addon.py‎
Lines changed: 63 additions & 0 deletions b/‎tests/aiu_addons/test_int8_addon.py‎
Lines changed: 63 additions & 0 deletions
@@ -31,7 +31,7 @@
 import torch.nn as nn
 
 # Local
-from fms_mo.aiu_addons.int8.int8_aiu_op import register_aiu_i8i8_op
+from fms_mo.aiu_addons.i8i8.i8i8_aiu_op import register_aiu_i8i8_op
 
 register_aiu_i8i8_op()
 
 
@@ -0,0 +1,131 @@
+# Copyright The FMS Model Optimizer Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Pytest configuration file with fixtures for add-ons functionality testing"""
+
+# Third Party
+import pytest
+import torch
+
+# ================================================
+# GPTQ W4A16 fixtures
+# ================================================
+
+gptq_input_sizes = [
+    {
+        "bs": 4,
+        "seq_len": 5,
+        "hid_dim": 256,
+        "out_feat": 512,
+        "n_grp": 4,
+    },
+]
+
+
+@pytest.fixture(scope="session", params=gptq_input_sizes)
+def get_gptq_gemm_inputs(request) -> tuple[torch.Tensor, ...]:
+    """pytest fixture returning test inputs for GPTQ op"""
+
+    sizes = request.param
+    compression_factor = 8  # assume 4-bits compression
+
+    x = torch.randn(
+        (sizes["bs"], sizes["seq_len"], sizes["hid_dim"]), dtype=torch.float16
+    )
+    qweight = torch.randint(
+        low=0,
+        high=torch.iinfo(torch.int32).max,
+        size=(sizes["out_feat"], sizes["hid_dim"] // compression_factor),
+        dtype=torch.int32,
+    )
+    qzeros = 8 * torch.ones(
+        (sizes["n_grp"], sizes["out_feat"] // compression_factor),
+        dtype=torch.int32,
+    )
+    scales = torch.randn(
+        (sizes["n_grp"], sizes["out_feat"]),
+        dtype=torch.float16,
+    )
+    g_idx = torch.zeros(sizes["hid_dim"], dtype=torch.int32)
+
+    return (x, qweight, qzeros, scales, g_idx)
+
+
+# ================================================
+# INT8xINT8 fixtures
+# ================================================
+
+i8i8_metadata = [
+    {
+        "bs": 4,
+        "seq_len": 7,
+        "hid_dim": 256,
+        "out_feat": 512,
+        "dtype": torch.float16,
+        "wtype": "per_tensor",  # per_channel
+        "atype": "per_tensor_symm",  # per_tensor_asymm, per_token
+        "smoothquant": False,
+    }
+]
+
+
+@pytest.fixture(scope="session", params=i8i8_metadata)
+def get_i8i8_gemm_inputs(
+    request,
+) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, str, str, bool]:
+    """pytest fixture returning test inputs for INT8xINT8 op"""
+
+    data = request.param
+    x = torch.randn(
+        (data["bs"], data["seq_len"], data["hid_dim"]),
+        dtype=data["dtype"],
+    ).clamp(-1, 1)
+    w_int = torch.randint(
+        low=-8,
+        high=8,
+        size=(data["out_feat"], data["hid_dim"]),
+        dtype=torch.int8,
+    )
+    b = torch.zeros(data["out_feat"], dtype=data["dtype"])
+    qdata = create_qdata(
+        data["wtype"],
+        data["atype"],
+        data["hid_dim"],
+        data["out_feat"],
+        data["smoothquant"],
+        data["dtype"],
+    )
+
+    return (x, w_int, b, qdata, data["wtype"], data["atype"], data["smoothquant"])
+
+
+def create_qdata(
+    wtype: str,
+    atype: str,
+    in_feat: int,
+    out_feat: int,
+    smoothquant: bool,
+    dtype: torch.dtype,
+) -> torch.Tensor:
+    """Generate dummy qdata tensor based on the provided quantization configuration"""
+
+    qdata_len = 2 if wtype == "per_tensor" else 2 * out_feat  # weight clips
+    qdata_len += 2  # activation clips
+    qdata_len += out_feat if atype == "per_tensor_asymm" else 1  # zero shift
+    qdata_len += in_feat if smoothquant else 1  # smoothquant scales
+
+    # TODO: improve dummy generation
+    qdata = torch.ones(qdata_len, dtype=dtype)
+    qdata[1] = -qdata[0]  # !!! temporary solution to enforce clip symmetry
+    qdata[3] = -qdata[2]
+    return qdata
@@ -1,57 +1,37 @@
-import pytest
+# Copyright The FMS Model Optimizer Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Test suite for FMS addon for AIU, introducing GPTQ functionalities"""
+
+# Third Party
 import torch
 
+# Local
 from fms_mo.aiu_addons.gptq.gptq_aiu_op import register_aiu_gptq_op
 
 
-input_sizes = [
-    {
-        "bs": 4,
-        "seq_len": 32,
-        "hid_dim": 768,
-        "out_feat": 3072,
-        "n_grp": 6,
-    },
-]
-
-
-@pytest.fixture(params=input_sizes)
-def get_gptq_gemm_inputs(request):
-    sizes = request.param
-    compression_factor = 8  # = assume 4-bits compression
-
-    x = torch.randn(
-        (sizes["bs"], sizes["seq_len"], sizes["hid_dim"]), dtype=torch.float16
-    )
-    qweight = torch.randint(
-        low=0,
-        high=torch.iinfo(torch.int32).max,
-        size=(sizes["out_feat"], sizes["hid_dim"] // compression_factor),
-        dtype=torch.int32,
-    )
-    qzeros = 8 * torch.ones(
-        (sizes["n_grp"], sizes["out_feat"] // 8), dtype = torch.int32
-    )
-    scales = torch.randn(
-        (sizes["n_grp"], sizes["out_feat"]), dtype=torch.float16,
-    )
-    g_idx = torch.zeros(sizes["hid_dim"], dtype=torch.int32)
-
-    return (x, qweight, qzeros, scales, g_idx)
-
-
 def test_gptq_registration() -> None:
-    """Call the registration function of GPTQ W4A16 operation, to add it.
-    Note: registration must be called before other GPTQ tests.
+    """Call the registration function of GPTQ W4A16 operation, adding the op to torch
+    namespace.
+    Note: registration must be called before other GPTQ tests that use this op.
     """
 
     register_aiu_gptq_op()
     assert hasattr(torch.ops, "gptq_gemm")
     assert hasattr(torch.ops.gptq_gemm, "i4f16_fxinputs_aiu")
-    return
 
 
-def test_gptq_op(get_gptq_gemm_inputs) -> None:
+def test_gptq_op(get_gptq_gemm_inputs: tuple[torch.Tensor, ...]) -> None:
     """Validate output shapes of GPTQ W4A16 tensors.
     Note: this AIU-compatible operation only returns a zero tensor of the
     expected shape, it does not perform a real W4A16 matmul operation.
 
@@ -0,0 +1,63 @@
+# Copyright The FMS Model Optimizer Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Test suite for FMS addon for AIU, introducing INT8xINT8 functionalities"""
+
+# Third Party
+import torch
+
+# Local
+from fms_mo.aiu_addons.i8i8.i8i8_aiu_op import register_aiu_i8i8_op
+
+
+def test_i8i8_registration() -> None:
+    """Call the registration function of INT8xINT8 operation, adding the op to torch
+    namespace.
+    Note: registration must be called before other INT8 tests that use this op.
+    """
+
+    register_aiu_i8i8_op()
+    assert hasattr(torch.ops, "fms_mo")
+    assert hasattr(torch.ops.fms_mo, "i8i8_aiu")
+
+
+def test_i8i8_op(
+    get_i8i8_gemm_inputs: tuple[
+        torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, str, str, bool
+    ],
+) -> None:
+    """Validate output shapes of INT8xINT8 matmul.
+    Computations are simulated, using quantized/dequantized tensors.
+    """
+
+    (
+        x,
+        weight,
+        bias,
+        qdata,
+        weight_quant_type,
+        activ_quant_type,
+        smoothquant,
+    ) = get_i8i8_gemm_inputs
+
+    out = torch.ops.fms_mo.i8i8_aiu(
+        x,
+        weight,
+        bias,
+        qdata,
+        weight_quant_type,
+        activ_quant_type,
+        smoothquant,
+    )
+
+    assert out.size() == torch.Size((x.size()[:-1] + (weight.size(0),)))