Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions tests/quantization/bnb/test_4bit.py
Original file line number Diff line number Diff line change
Expand Up @@ -526,7 +526,7 @@ def test_moving_to_cpu_throws_warning(self):
reason="Test will pass after https://github.com/huggingface/accelerate/pull/3223 is in a release.",
strict=True,
)
def test_pipeline_device_placement_works_with_nf4(self):
def test_pipeline_cuda_placement_works_with_nf4(self):
transformer_nf4_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
Expand Down Expand Up @@ -560,7 +560,7 @@ def test_pipeline_device_placement_works_with_nf4(self):
).to(torch_device)

# Check if inference works.
_ = pipeline_4bit("table", max_sequence_length=20, num_inference_steps=2)
_ = pipeline_4bit(self.prompt, max_sequence_length=20, num_inference_steps=2)

del pipeline_4bit

Expand Down
4 changes: 2 additions & 2 deletions tests/quantization/bnb/test_mixed_int8.py
Original file line number Diff line number Diff line change
Expand Up @@ -492,7 +492,7 @@ def test_generate_quality_dequantize(self):
self.assertTrue(max_diff < 1e-2)

# 8bit models cannot be offloaded to CPU.
self.assertTrue(self.pipeline_8bit.transformer.device.type == "cuda")
self.assertTrue(self.pipeline_8bit.transformer.device.type == torch_device)
# calling it again shouldn't be a problem
_ = self.pipeline_8bit(
prompt=self.prompt,
Expand Down Expand Up @@ -534,7 +534,7 @@ def test_pipeline_cuda_placement_works_with_mixed_int8(self):
).to(device)

# Check if inference works.
_ = pipeline_8bit("table", max_sequence_length=20, num_inference_steps=2)
_ = pipeline_8bit(self.prompt, max_sequence_length=20, num_inference_steps=2)

del pipeline_8bit

Expand Down