foundation-model-stack
diff --git a/‎.github/workflows/labelpr.yaml‎
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/labelpr.yaml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎.github/workflows/pypi.yml‎
Lines changed: 3 additions & 3 deletions b/‎.github/workflows/pypi.yml‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎.github/workflows/test.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/test.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎fms_mo/aiu_addons/gptq/gptq_aiu_op.py‎
Lines changed: 59 additions & 10 deletions b/‎fms_mo/aiu_addons/gptq/gptq_aiu_op.py‎
Lines changed: 59 additions & 10 deletions
diff --git a/‎fms_mo/aiu_addons/i8i8/i8i8_aiu_adapter.py‎
Lines changed: 17 additions & 36 deletions b/‎fms_mo/aiu_addons/i8i8/i8i8_aiu_adapter.py‎
Lines changed: 17 additions & 36 deletions
@@ -13,7 +13,7 @@ jobs:
           github-token: ${{ secrets.GITHUB_TOKEN }}
           script: |
             // https://github.com/commitizen/conventional-commit-types
-            const valid_pr_types = ['feat', 'fix', 'docs', 'style', 'refactor', 'perf', 'test', 'build', 'ci', 'chore', 'revert'];
+            const valid_pr_types = ['feat', 'fix', 'docs', 'style', 'refactor', 'perf', 'test', 'build', 'ci', 'chore', 'revert', 'dependencies'];
 
 
             const title = context.payload.pull_request.title;
@@ -28,4 +28,4 @@ jobs:
             const labels = context.payload.pull_request.labels;
             const new_labels = labels.filter(label => !valid_pr_types.includes(label.name)); // keep all labels that are not in valid_pr_types
             new_labels.push({name: pr_type});
-            await github.rest.issues.update({ ...context.repo, issue_number: context.payload.number, labels: new_labels });
+            await github.rest.issues.update({ ...context.repo, issue_number: context.payload.number, labels: new_labels });
@@ -44,7 +44,7 @@ jobs:
                   # for setuptools-scm
                   fetch-depth: 0
 
-            - uses: hynek/build-and-inspect-python-package@v2
+            - uses: hynek/build-and-inspect-python-package@b5076c307dc91924a82ad150cdd1533b444d3310 # v2.12.0
 
     # push to Test PyPI on
     # - a new GitHub release is published
@@ -77,7 +77,7 @@ jobs:
                   path: dist
 
             - name: Upload to Test PyPI
-              uses: pypa/gh-action-pypi-publish@15c56dba361d8335944d31a2ecd17d700fc7bcbc # v1.12.2
+              uses: pypa/gh-action-pypi-publish@76f52bc884231f62b9a034ebfe128415bbaabdfc # v1.12.4
               with:
                   repository-url: https://test.pypi.org/legacy/
 
@@ -122,4 +122,4 @@ jobs:
               run: rm ./dist/*.sigstore.json
 
             - name: Upload to PyPI
-              uses: pypa/gh-action-pypi-publish@15c56dba361d8335944d31a2ecd17d700fc7bcbc # v1.12.2
+              uses: pypa/gh-action-pypi-publish@76f52bc884231f62b9a034ebfe128415bbaabdfc # v1.12.4
@@ -40,9 +40,9 @@ jobs:
     strategy:
       matrix:
         python:
-          - "3.9"
           - "3.10"
           - "3.11"
+          - "3.12"
         platform:
           - "ubuntu-latest"
 
 
@@ -17,6 +17,7 @@
 import logging
 
 # Third Party
+from packaging.version import Version
 import torch
 
 # pylint: disable=unused-argument
@@ -25,6 +26,36 @@
 logger = logging.getLogger(__name__)
 
 
+def implement_op_decorator(op_namespace_id):
+    """Version-dependent decorator for custom op implementation.
+    Always compare against pytorch version in current environment.
+    """
+
+    torch_version = Version(torch.__version__.split("+", maxsplit=1)[0])
+
+    def decorator(func):
+        if torch_version < Version("2.4"):
+            return torch.library.impl(op_namespace_id, "default")(func)
+        return torch.library.custom_op(op_namespace_id, mutates_args=())(func)
+
+    return decorator
+
+
+def register_op_decorator(op_namespace_id):
+    """Version-dependent decorator for custom op registration.
+    Always compare against pytorch version in current environment.
+    """
+
+    torch_version = Version(torch.__version__.split("+", maxsplit=1)[0])
+
+    def decorator(func):
+        if torch_version < Version("2.4"):
+            return torch.library.impl_abstract(op_namespace_id)(func)
+        return torch.library.register_fake(op_namespace_id)(func)
+
+    return decorator
+
+
 def register_aiu_gptq_op():
     """Register AIU-specific op to enable torch compile without graph break.
     The op preserves I/O shapes of a `X @ W^T` matmul but performs no operation.
@@ -36,17 +67,33 @@ def register_aiu_gptq_op():
     ):
         logger.warning("AIU op has already been registered")
         return
-
     op_namespace_id = "gptq_gemm::i4f16_fxinputs_aiu"
-    torch.library.define(
-        op_namespace_id,
-        "(Tensor x, Tensor qw, Tensor qzeros, Tensor scales, Tensor g_idx) -> Tensor",
-    )
+    if Version(torch.__version__.split("+", maxsplit=1)[0]) < Version("2.4"):
+        torch.library.define(
+            op_namespace_id,
+            "(Tensor x, Tensor qw, Tensor qzeros, "
+            "Tensor scales, Tensor g_idx) -> Tensor",
+        )
 
     # Add implementations for the operator
-    @torch.library.impl(op_namespace_id, "default")
-    def i4f16_fxinputs_aiu(x, qw, qzeros, scales, g_idx):
-        # on AIU, GPTQ qw is [out_feat, in_feat]
+    @implement_op_decorator(op_namespace_id)
+    def i4f16_fxinputs_aiu(
+        x: torch.Tensor,
+        qw: torch.Tensor,
+        qzeros: torch.Tensor,
+        scales: torch.Tensor,
+        g_idx: torch.Tensor,
+    ) -> torch.Tensor:
+        """Implement fake processing of GPTQ W4A16 matmul. The purpose is to create a
+        node on the computational graph to be captured during compiling for AIU.
+
+        Instead of computing the weight decompression and matmul, this function returns
+        a zero tensor with the expected shape.
+
+        NOTE: on AIU, GPTQ qw is [out_feat, in_feat], while AutoGPTQ saves the quantized
+        weights as [in_feat, out_feat]
+        """
+
         outshape = x.shape[:-1] + (qw.shape[0],)
         x = x.view(-1, x.shape[-1])
         output = torch.zeros(
@@ -56,8 +103,10 @@ def i4f16_fxinputs_aiu(x, qw, qzeros, scales, g_idx):
         )
         return output.view(outshape)
 
-    @torch.library.impl_abstract(op_namespace_id)
-    def i4f16_fxinputs_aiu_abstract(x, qw, qzeros, scales, g_idx):
+    @register_op_decorator(op_namespace_id)
+    def _(x, qw, qzeros, scales, g_idx):
+        """OP template of I/O sizes"""
+
         outshape = x.shape[:-1] + (qw.shape[0],)
         return torch.empty(
             outshape,
 
@@ -97,41 +97,22 @@ def _add_defaults_and_concat(
         )
 
 
-# registration of new adapter steps for each architecture
-serialization.register_adapter_step("llama", "int8_qparams_aiu", _int8_qparams_aiu)
-serialization.register_adapter_step(
-    "gpt_bigcode", "int8_qparams_aiu", _int8_qparams_aiu
-)
-serialization.register_adapter_step("roberta", "int8_qparams_aiu", _int8_qparams_aiu)
-serialization.register_adapter_step(
-    "roberta_question_answering",
-    "int8_qparams_aiu",
-    _int8_qparams_aiu,
-)
-
-# registration of multi-step adapter for each architecture
-serialization.register_adapter(
+# registration of new adapter step and adapter for each architecture
+for arch in [
     "llama",
-    "fms_mo",
-    [
-        "hf_to_fms_names",
-        "hf_to_fms_rope",
-        "weight_fusion",
-        "int8_qparams_aiu",
-    ],
-)
-serialization.register_adapter(
-    "gpt_bigcode", "fms_mo", ["hf_to_fms_names", "weight_fusion", "int8_qparams_aiu"]
-)
-serialization.register_adapter(
-    "roberta", "fms_mo", ["hf_to_fms_names", "weight_fusion", "int8_qparams_aiu"]
-)
-serialization.register_adapter(
+    "gpt_bigcode",
+    "granite",
+    "roberta",
     "roberta_question_answering",
-    "fms_mo",
-    [
-        "hf_to_fms_names",
-        "weight_fusion",
-        "int8_qparams_aiu",
-    ],
-)
+]:
+    serialization.register_adapter_step(arch, "int8_qparams_aiu", _int8_qparams_aiu)
+    if arch in ["llama", "granite"]:
+        steps_to_register = [
+            "hf_to_fms_names",
+            "hf_to_fms_rope",
+            "weight_fusion",
+            "int8_qparams_aiu",
+        ]
+    else:
+        steps_to_register = ["hf_to_fms_names", "weight_fusion", "int8_qparams_aiu"]
+    serialization.register_adapter(arch, "fms_mo", steps_to_register)