skip some fp4 tests on hpu

matthewdouglas · matthewdouglas · commit 5c736a7c04ac · 2025-06-12T16:23:19.000-04:00
diff --git a/tests/test_autograd.py b/tests/test_autograd.py
@@ -189,6 +189,9 @@ def test_matmul_4bit(
     if device == "cpu" and dtype != torch.float32 and any(req_grad) and torch.__version__ < (2, 6):
         pytest.xfail("mse_loss fp16 on CPU is not supported in torch < 2.6")
 
+    if device == "hpu" and quant_type != "nf4":
+        pytest.skip("HPU only supports nf4")
+
     for i in range(3):
         # normal multiply
         if funcs[0] in [torch.mm, torch.matmul]:
diff --git a/tests/test_linear4bit.py b/tests/test_linear4bit.py
@@ -276,6 +276,9 @@ def test_linear4bit_torch_compile(device, quant_type, compute_dtype, compress_st
     if device == "cuda" and platform.system() == "Windows":
         pytest.skip("Triton is not officially supported on Windows")
 
+    if device == "hpu" and quant_type != "nf4":
+        pytest.skip("fp4 dequantization is not supported on HPU")
+
     # Has a strange regression on Linux aarch64 CPU in torch==2.6.0 when fullgraph=False.
     if (
         not fullgraph
diff --git a/tests/test_ops.py b/tests/test_ops.py
@@ -179,6 +179,9 @@ def test_quantize_4bit(self, device, dtype, storage_dtype, quant_type, blocksize
     @pytest.mark.parametrize("quant_type", ["fp4", "nf4"])
     @pytest.mark.parametrize("blocksize", [64, 128, 256, 512])
     def test_dequantize_4bit(self, device, dtype, storage_dtype, quant_type, blocksize):
+        if device == "hpu" and quant_type != "nf4":
+            pytest.skip("fp4 dequantization is not supported on HPU")
+
         shape = (128, 128)
 
         n = prod(shape)
@@ -210,6 +213,9 @@ def test_dequantize_4bit(self, device, dtype, storage_dtype, quant_type, blocksi
     @pytest.mark.parametrize("quant_type", ["fp4", "nf4"])
     @pytest.mark.parametrize("blocksize", [64, 128, 256, 512])
     def test_gemv_4bit(self, device, dtype, storage_dtype, quant_type, blocksize):
+        if device == "hpu" and quant_type != "nf4":
+            pytest.skip("fp4 dequantization is not supported on HPU")
+
         out_features = 1024
         in_features = 256