Add kernels-test-utils shared Python package

robtaylor · robtaylor · commit c220611160b6 · 2026-03-09T16:16:18.000Z
Create a shared test utilities package that consolidates duplicated
device detection, tolerance tables, and allclose helpers across all
kernel repos. The package is automatically available in all kernel
dev/test shells via the default pythonCheckInputs.

Modules:
- device: get_device(), get_available_devices(), skip_if_no_gpu()
- tolerances: DEFAULT_TOLERANCES dict, get_tolerances(dtype)
- allclose: fp8_allclose() with MPS float64 workaround

Wired into nix overlay and set as default pythonCheckInputs in
genKernelFlakeOutputs so downstream repos get it automatically.
Updated template test to use kernels_test_utils imports.

Co-developed-by: Claude Code v2.1.58 (claude-opus-4-6)
diff --git a/flake.nix b/flake.nix
@@ -90,7 +90,7 @@
             # fail in a GPU-less sandbox. Even in that case, it's better to lazily
             # load the part with this functionality.
             doGetKernelCheck ? true,
-            pythonCheckInputs ? pkgs: [ ],
+            pythonCheckInputs ? pkgs: [ pkgs.kernels-test-utils ],
             pythonNativeCheckInputs ? pkgs: [ ],
             torchVersions ? _: torchVersions',
           }:
diff --git a/kernels-test-utils/pyproject.toml b/kernels-test-utils/pyproject.toml
@@ -0,0 +1,9 @@
+[build-system]
+requires = ["setuptools"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "kernels-test-utils"
+version = "0.1.0"
+requires-python = ">=3.10"
+dependencies = ["pytest", "torch"]
diff --git a/kernels-test-utils/src/kernels_test_utils/__init__.py b/kernels-test-utils/src/kernels_test_utils/__init__.py
@@ -0,0 +1,14 @@
+"""Shared test utilities for kernel repos."""
+
+from kernels_test_utils.allclose import fp8_allclose
+from kernels_test_utils.device import get_available_devices, get_device, skip_if_no_gpu
+from kernels_test_utils.tolerances import DEFAULT_TOLERANCES, get_tolerances
+
+__all__ = [
+    "fp8_allclose",
+    "get_available_devices",
+    "get_device",
+    "get_tolerances",
+    "skip_if_no_gpu",
+    "DEFAULT_TOLERANCES",
+]
diff --git a/kernels-test-utils/src/kernels_test_utils/allclose.py b/kernels-test-utils/src/kernels_test_utils/allclose.py
@@ -0,0 +1,32 @@
+"""Allclose variants that work around device limitations."""
+
+import torch
+from torch._prims_common import TensorLikeType
+
+
+def fp8_allclose(
+    a: TensorLikeType,
+    b: TensorLikeType,
+    rtol: float = 1e-05,
+    atol: float = 1e-08,
+    equal_nan: bool = False,
+) -> bool:
+    """``torch.allclose`` replacement that handles FP8 types and MPS.
+
+    On MPS (which lacks float64) the comparison is done in float32.
+    Everywhere else the tensors are promoted to float64.
+    """
+    torch._refs._check_close_args(name="torch.allclose", a=a, b=b, rtol=rtol, atol=atol)
+
+    if a.device.type == "mps" or b.device.type == "mps":
+        a_cmp = a.float()
+        b_cmp = b.float()
+    else:
+        a_cmp = a.double()
+        b_cmp = b.double()
+
+    return bool(
+        torch.all(
+            torch.isclose(a_cmp, b_cmp, rtol=rtol, atol=atol, equal_nan=equal_nan)
+        ).item()
+    )
diff --git a/kernels-test-utils/src/kernels_test_utils/device.py b/kernels-test-utils/src/kernels_test_utils/device.py
@@ -0,0 +1,41 @@
+"""Device detection utilities for kernel tests."""
+
+from typing import List
+
+import pytest
+import torch
+
+
+def get_device() -> torch.device:
+    """Return the best available compute device (MPS > CUDA > XPU > CPU)."""
+    if hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
+        return torch.device("mps")
+    if torch.cuda.is_available():
+        return torch.device("cuda")
+    if hasattr(torch, "xpu") and torch.xpu.is_available():
+        return torch.device("xpu")
+    return torch.device("cpu")
+
+
+def get_available_devices() -> List[str]:
+    """Return device strings suitable for pytest parametrization.
+
+    On MPS: ``["mps"]``
+    On CUDA: ``["cuda:0", "cuda:1", ...]`` for each visible GPU.
+    On XPU: ``["xpu:0", "xpu:1", ...]`` for each visible accelerator.
+    Fallback: ``["cpu"]``
+    """
+    if hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
+        return ["mps"]
+    if torch.cuda.is_available():
+        return [f"cuda:{i}" for i in range(max(1, torch.cuda.device_count()))]
+    if hasattr(torch, "xpu") and torch.xpu.is_available():
+        return [f"xpu:{i}" for i in range(max(1, torch.xpu.device_count()))]
+    return ["cpu"]
+
+
+def skip_if_no_gpu() -> None:
+    """Call inside a test to skip when no GPU is available."""
+    dev = get_device()
+    if dev.type == "cpu":
+        pytest.skip("No GPU device available")
diff --git a/kernels-test-utils/src/kernels_test_utils/tolerances.py b/kernels-test-utils/src/kernels_test_utils/tolerances.py
@@ -0,0 +1,19 @@
+"""Default tolerance tables for kernel tests."""
+
+from typing import Dict
+
+import torch
+
+DEFAULT_TOLERANCES: Dict[torch.dtype, Dict[str, float]] = {
+    torch.float32: {"atol": 1e-5, "rtol": 1e-5},
+    torch.float16: {"atol": 1e-3, "rtol": 1e-3},
+    torch.bfloat16: {"atol": 1e-2, "rtol": 1.6e-2},
+}
+
+
+def get_tolerances(dtype: torch.dtype) -> Dict[str, float]:
+    """Return ``{"atol": ..., "rtol": ...}`` for *dtype*.
+
+    Falls back to ``atol=0.1, rtol=0.1`` for unknown dtypes.
+    """
+    return DEFAULT_TOLERANCES.get(dtype, {"atol": 0.1, "rtol": 0.1})
diff --git a/nix/overlay.nix b/nix/overlay.nix
@@ -83,6 +83,8 @@ in
 
         kernels = callPackage ./pkgs/python-modules/kernels { };
 
+        kernels-test-utils = callPackage ./pkgs/python-modules/kernels-test-utils { };
+
         pyclibrary = python-self.callPackage ./pkgs/python-modules/pyclibrary { };
 
         mkTorch = callPackage ./pkgs/python-modules/torch/binary { };
diff --git a/nix/pkgs/python-modules/kernels-test-utils/default.nix b/nix/pkgs/python-modules/kernels-test-utils/default.nix
@@ -0,0 +1,42 @@
+{
+  lib,
+  buildPythonPackage,
+  setuptools,
+
+  pytest,
+  torch,
+}:
+
+let
+  version =
+    (builtins.fromTOML (builtins.readFile ../../../../kernels-test-utils/pyproject.toml)).project.version;
+in
+buildPythonPackage {
+  pname = "kernels-test-utils";
+  inherit version;
+  pyproject = true;
+
+  src =
+    let
+      sourceFiles = file: file.hasExt "toml" || file.hasExt "py";
+    in
+    lib.fileset.toSource {
+      root = ../../../../kernels-test-utils;
+      fileset = lib.fileset.fileFilter sourceFiles ../../../../kernels-test-utils;
+    };
+
+  build-system = [ setuptools ];
+
+  dependencies = [
+    pytest
+    torch
+  ];
+
+  pythonImportsCheck = [
+    "kernels_test_utils"
+  ];
+
+  meta = with lib; {
+    description = "Shared test utilities for kernel repos";
+  };
+}
diff --git a/template/tests/test___KERNEL_NAME_NORMALIZED__.py b/template/tests/test___KERNEL_NAME_NORMALIZED__.py
@@ -1,19 +1,12 @@
-import platform
-
 import torch
 
+from kernels_test_utils import get_device
+
 import __KERNEL_NAME_NORMALIZED__
 
 
 def test___KERNEL_NAME_NORMALIZED__():
-    if platform.system() == "Darwin":
-        device = torch.device("mps")
-    elif hasattr(torch, "xpu") and torch.xpu.is_available():
-        device = torch.device("xpu")
-    elif torch.version.cuda is not None and torch.cuda.is_available():
-        device = torch.device("cuda")
-    else:
-        device = torch.device("cpu")
+    device = get_device()
 
     x = torch.randn(1024, 1024, dtype=torch.float32, device=device)
     expected = x + 1.0