More test fixes

matthewdouglas · matthewdouglas · commit d02b536b1976 · 2025-04-24T18:22:14.000-04:00
diff --git a/bitsandbytes/functional.py b/bitsandbytes/functional.py
@@ -779,7 +779,7 @@ def quantize_blockwise(
             state2=state2,
         )
     else:
-        quant_state = QuantState(absmax=_absmax, code=code, blocksize=blocksize, dtype=A.dtype)
+        quant_state = QuantState(absmax=_absmax, code=code.to(A.device), blocksize=blocksize, dtype=A.dtype)
 
     # TODO(matthewdouglas): Deprecate out kwarg
     out = out.copy_(_out) if out is not None else _out
diff --git a/tests/test_linear4bit.py b/tests/test_linear4bit.py
@@ -24,8 +24,11 @@
 @pytest.mark.parametrize("quant_type", ["nf4", "fp4"])
 @pytest.mark.parametrize("save_before_forward", TRUE_FALSE, ids=id_formatter("save_before_forward"))
 def test_linear_serialization(device, quant_type, compress_statistics, bias, quant_storage, save_before_forward):
-    if device == "cpu" and quant_type == "fp4":
-        pytest.xfail("FP4 is not supported for CPU")
+    if device == "cpu":
+        if quant_type == "fp4":
+            pytest.xfail("FP4 is not supported for CPU")
+        if quant_storage != "uint8":
+            pytest.xfail("Only uint8 storage is supported for CPU")
 
     original_dtype = torch.float16
     compute_dtype = None
@@ -144,8 +147,9 @@ def test_linear_serialization(device, quant_type, compress_statistics, bias, qua
     linear_q3 = torch_load_from_buffer(bytes_4bit)
 
     # Test moving to CPU and back to GPU
-    linear_q2.to("cpu")
-    linear_q2.to(device)
+    if device != "cpu":
+        linear_q2.to("cpu")
+        linear_q2.to(device)
     d = linear_qs(x)
     assert c.dtype == d.dtype
     assert c.device == d.device

Original file line number	Diff line number	Diff line change
`@@ -779,7 +779,7 @@ def quantize_blockwise(`
`779`	`779`	`state2=state2,`
`780`	`780`	`)`
`781`	`781`	`else:`
`782`		`- quant_state = QuantState(absmax=_absmax, code=code, blocksize=blocksize, dtype=A.dtype)`
	`782`	`+ quant_state = QuantState(absmax=_absmax, code=code.to(A.device), blocksize=blocksize, dtype=A.dtype)`
`783`	`783`
`784`	`784`	`# TODO(matthewdouglas): Deprecate out kwarg`
`785`	`785`	`out = out.copy_(_out) if out is not None else _out`