Additional test patches for HPU

matthewdouglas · matthewdouglas · commit de6057be7672 · 2025-06-16T14:18:54.000-04:00
diff --git a/tests/test_linear4bit.py b/tests/test_linear4bit.py
@@ -294,9 +294,6 @@ def test_linear4bit_torch_compile(device, quant_type, compute_dtype, compress_st
     if device == "cuda" and platform.system() == "Windows":
         pytest.skip("Triton is not officially supported on Windows")
 
-    if device == "hpu" and quant_type != "nf4":
-        pytest.skip("fp4 dequantization is not supported on HPU")
-
     # Has a strange regression on Linux aarch64 CPU in torch==2.6.0 when fullgraph=False.
     if (
         not fullgraph
diff --git a/tests/test_linear8bitlt.py b/tests/test_linear8bitlt.py
@@ -257,7 +257,8 @@ def test_linear8bitlt_torch_compile(device, threshold, bias, fullgraph, mode):
             ref_output = net(x)
 
         # Compile the model
-        compiled_net = torch.compile(net, fullgraph=fullgraph, mode=mode)
+        compile_backend = "hpu_backend" if device == "hpu" else "inductor"
+        compiled_net = torch.compile(net, fullgraph=fullgraph, mode=mode, backend=compile_backend)
 
         # Get output from compiled model
         with torch.no_grad():
diff --git a/tests/test_modules.py b/tests/test_modules.py
@@ -5,7 +5,7 @@
 from torch import nn
 
 import bitsandbytes as bnb
-from tests.helpers import get_available_devices, id_formatter
+from tests.helpers import get_available_devices, id_formatter, is_supported_on_hpu
 
 
 class MockArgs:
@@ -295,7 +295,13 @@ def test_kbit_backprop(device, module):
     torch.nn.init.kaiming_normal_(ref[0].weight)
     torch.nn.init.kaiming_normal_(ref[1].weight)
     ref[1].weight.requires_grad_(False)
+
     kbit = nn.Sequential(*[torch.nn.Linear(dim1, dim2), module(dim2, 128)])
+
+    if device == "hpu":
+        if isinstance(module, bnb.nn.LinearFP4):
+            pytest.skip("FP4 is not supported on HPU")
+
     kbit[0].weight.detach().copy_(ref[0].weight)
     kbit[1].weight.detach().copy_(ref[1].weight)
     kbit[0].bias.detach().copy_(ref[0].bias)
@@ -358,6 +364,12 @@ def test_kbit_backprop(device, module):
     ids=lambda x: x.__name__ if inspect.isclass(x) else str(x),
 )
 def test_embedding_lossless(device, embedding_class, input_shape, embedding_dim, quant_storage):
+    if device == "hpu":
+        if embedding_class is bnb.nn.EmbeddingFP4:
+            pytest.skip("FP4 is not supported on HPU")
+        elif embedding_class is bnb.nn.EmbeddingNF4 and not is_supported_on_hpu("nf4", torch.float32, quant_storage):
+            pytest.skip("This configuration is not supported on HPU")
+
     num_embeddings = 128
 
     src_weight = (torch.randn((num_embeddings, embedding_dim), dtype=torch.float32) > 0).to(
@@ -403,6 +415,12 @@ def test_embedding_lossless(device, embedding_class, input_shape, embedding_dim,
     ids=lambda x: x.__name__ if inspect.isclass(x) else str(x),
 )
 def test_embedding_error(device, embedding_class, input_shape, embedding_dim, quant_storage):
+    if device == "hpu":
+        if embedding_class is bnb.nn.EmbeddingFP4:
+            pytest.skip("FP4 is not supported on HPU")
+        elif embedding_class is bnb.nn.EmbeddingNF4 and not is_supported_on_hpu("nf4", torch.float32, quant_storage):
+            pytest.skip("This configuration is not supported on HPU")
+
     is_8bit = embedding_class is bnb.nn.Embedding8bit
 
     num_embeddings = 128