test: Moved triton quick return to skip and reduced test sizes

BrandonGroth · BrandonGroth · commit f9f97d43772e · 2025-06-23T14:56:41.000-04:00
Signed-off-by: Brandon Groth &lt;brandon.m.groth@gmail.com&gt;
diff --git a/fms_mo/custom_ext_kernels/triton_kernels.py b/fms_mo/custom_ext_kernels/triton_kernels.py
@@ -17,7 +17,7 @@
 # Third Party
 import torch
 
-# First Party
+# Local
 from fms_mo.utils.import_utils import available_packages
 
 # Assume any calls to the file are requesting triton
@@ -27,11 +27,11 @@
     )
 
 # Third Party
+# pylint: disable=wrong-import-position
 from triton.language.extra import libdevice
 import triton
 import triton.language as tl
 
-
 DTYPE_I8 = [torch.int8]
 DTYPE_F8 = [torch.float8_e4m3fn, torch.float8_e5m2]
 DTYPE_8BIT = DTYPE_I8 + DTYPE_F8
diff --git a/tests/triton_kernels/test_triton_mm.py b/tests/triton_kernels/test_triton_mm.py
@@ -32,7 +32,7 @@
     )
 
 
-@pytest.mark.parametrize("mkn", [64, 256, 1024, 4096])
+@pytest.mark.parametrize("mkn", [64, 256, 1024])
 @pytest.mark.parametrize(
     "dtype_to_test",
     [
@@ -43,11 +43,12 @@
         torch.float8_e5m2,
     ],
 )
+@pytest.mark.skipif(
+    not torch.cuda.is_available(),
+    reason="test_triton_matmul_fp can only when GPU is available",
+)
 def test_triton_matmul_fp(mkn, dtype_to_test):
     """Parametric tests for triton matmul kernel using variety of tensor sizes and dtypes."""
-    if not torch.cuda.is_available():
-        # only run the test when GPU is available
-        return
 
     torch.manual_seed(23)
     m = n = k = mkn
@@ -79,12 +80,13 @@ def test_triton_matmul_fp(mkn, dtype_to_test):
     assert torch.norm(diff_trun_8b) / torch.norm(torch_output) < 1e-3
 
 
-@pytest.mark.parametrize("mkn", [64, 256, 1024, 4096])
+@pytest.mark.parametrize("mkn", [64, 256, 1024])
+@pytest.mark.skipif(
+    not torch.cuda.is_available(),
+    reason="test_triton_matmul_int8 can only when GPU is available",
+)
 def test_triton_matmul_int8(mkn):
     """Parametric tests for triton imatmul kernel using variety of tensor sizes."""
-    if not torch.cuda.is_available():
-        # only run the test when GPU is available
-        return
 
     torch.manual_seed(23)
     m = n = k = mkn
@@ -121,13 +123,14 @@ def test_triton_matmul_int8(mkn):
 
 @pytest.mark.parametrize("feat_in_out", [(64, 128), (256, 1024), (1024, 4096)])
 @pytest.mark.parametrize("trun_bits", [0, 8, 12, 16])
+@pytest.mark.skipif(
+    not torch.cuda.is_available(),
+    reason="test_linear_fpx_acc can only when GPU is available",
+)
 def test_linear_fpx_acc(feat_in_out, trun_bits):
     """Parametric tests for LinearFPxAcc. This Linear utilizes triton kernel hence can only be run
     on CUDA.
     """
-    if not torch.cuda.is_available():
-        # only run the test when GPU is available
-        return
 
     torch.manual_seed(23)
     feat_in, feat_out = feat_in_out