tests: skip non SM100/103 for grouped deepgemm (#1767)

jimmyzho · web-flow · commit 4e7969ec5cc5 · 2025-09-24T22:45:02.000-07:00
## 📌 Description skip test_fp8_groupwise_group_deepgemm and test_fp8_groupwise_group_deepgemm where SM not 100 and 103, add relevant checks in library ## 🔍 Related Issues  ## 🚀 Pull Request Checklist Thank you for contributing to FlashInfer! Before we review your pull request, please make sure the following items are complete. ### ✅ Pre-commit Checks - [ ] I have installed `pre-commit` by running `pip install pre-commit` (or used your preferred method). - [ ] I have installed the hooks with `pre-commit install`. - [ ] I have run the hooks manually with `pre-commit run --all-files` and fixed any reported issues. > If you are unsure about how to set up `pre-commit`, see [the pre-commit documentation](https://pre-commit.com/). ## 🧪 Tests - [ ] Tests have been added or updated as needed. - [ ] All tests are passing (`unittest`, etc.). ## Reviewer Notes  --------- Co-authored-by: jimmzhou <jimmzhou@nvidia.com>
diff --git a/flashinfer/gemm.py b/flashinfer/gemm.py
@@ -3078,6 +3078,11 @@ def group_deepgemm_fp8_nt_groupwise(
     """
     from flashinfer.deep_gemm import m_grouped_fp8_gemm_nt_contiguous
 
+    if not _match_sm_version(a.device, ["100", "103"]):
+        raise ValueError(
+            "m_grouped_fp8_gemm_nt_contiguous is only supported on SM100, SM100, SM103."
+        )
+
     if out is None:
         out_dtype = out_dtype or torch.bfloat16
         out = torch.empty(a.shape[0], b.shape[1], dtype=out_dtype, device=a.device)
@@ -3206,6 +3211,11 @@ def batch_deepgemm_fp8_nt_groupwise(
     """
     from flashinfer.deep_gemm import m_grouped_fp8_gemm_nt_masked
 
+    if not _match_sm_version(a.device, ["100", "103"]):
+        raise ValueError(
+            "m_grouped_fp8_gemm_nt_masked is only supported on SM100, SM103."
+        )
+
     if out is None:
         out_dtype = out_dtype or torch.bfloat16
         out = torch.empty(
diff --git a/tests/test_groupwise_scaled_gemm_fp8.py b/tests/test_groupwise_scaled_gemm_fp8.py
@@ -202,6 +202,11 @@ def test_fp8_groupwise_group_deepgemm(
     group_size,
     out_dtype,
 ):
+    compute_capability = get_compute_capability(torch.device(device="cuda"))
+    if compute_capability[0] != 10:
+        pytest.skip(
+            "group_deepgemm_fp8_nt_groupwise is only supported on SM100, SM103 in trtllm backend."
+        )
     torch.random.manual_seed(0)
     m_per_group = m // group_size
     if m_per_group < 128:
@@ -245,6 +250,11 @@ def test_fp8_groupwise_batch_deepgemm_masked(
     group_size,
     out_dtype,
 ):
+    compute_capability = get_compute_capability(torch.device(device="cuda"))
+    if compute_capability[0] != 10:
+        pytest.skip(
+            "batch_deepgemm_fp8_nt_groupwise is only supported on SM100, SM103."
+        )
     torch.random.manual_seed(0)
     n, k = nk
     a = torch.randn((group_size, m, k), device="cuda", dtype=torch.float32)