Skip to content

Commit 49b0f5d

Browse files
committed
update
1 parent e59f957 commit 49b0f5d

File tree

3 files changed

+17
-17
lines changed

3 files changed

+17
-17
lines changed

tests/quantization/bnb/test_4bit.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -877,7 +877,7 @@ class Bnb4BitCompileTests(QuantCompileTests, unittest.TestCase):
877877
@property
878878
def quantization_config(self):
879879
return PipelineQuantizationConfig(
880-
quant_backend="bitsandbytes_8bit",
880+
quant_backend="bitsandbytes_4bit",
881881
quant_kwargs={
882882
"load_in_4bit": True,
883883
"bnb_4bit_quant_type": "nf4",

tests/quantization/test_torch_compile_utils.py

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
# See the License for the specific language governing permissions and
1414
# limitations under the License.
1515
import gc
16+
import inspect
1617

1718
import torch
1819

@@ -54,19 +55,16 @@ def _test_torch_compile(self, torch_dtype=torch.bfloat16):
5455
# `fullgraph=True` ensures no graph breaks
5556
pipe.transformer.compile(fullgraph=True)
5657

57-
with torch._dynamo.config.patch(error_on_recompile=True):
58-
for _ in range(2):
59-
# small resolutions to ensure speedy execution.
60-
pipe("a dog", num_inference_steps=2, max_sequence_length=16, height=256, width=256)
58+
# small resolutions to ensure speedy execution.
59+
pipe("a dog", num_inference_steps=2, max_sequence_length=16, height=256, width=256)
6160

6261
def _test_torch_compile_with_cpu_offload(self, torch_dtype=torch.bfloat16):
6362
pipe = self._init_pipeline(self.quantization_config, torch_dtype)
6463
pipe.enable_model_cpu_offload()
6564
pipe.transformer.compile()
6665

67-
for _ in range(2):
68-
# small resolutions to ensure speedy execution.
69-
pipe("a dog", num_inference_steps=2, max_sequence_length=16, height=256, width=256)
66+
# small resolutions to ensure speedy execution.
67+
pipe("a dog", num_inference_steps=2, max_sequence_length=16, height=256, width=256)
7068

7169
def _test_torch_compile_with_group_offload_leaf(self, torch_dtype=torch.bfloat16, *, use_stream: bool = False):
7270
torch._dynamo.config.cache_size_limit = 1000
@@ -85,15 +83,17 @@ def _test_torch_compile_with_group_offload_leaf(self, torch_dtype=torch.bfloat16
8583
if torch.device(component.device).type == "cpu":
8684
component.to("cuda")
8785

88-
for _ in range(2):
89-
# small resolutions to ensure speedy execution.
90-
pipe("a dog", num_inference_steps=2, max_sequence_length=16, height=256, width=256)
86+
# small resolutions to ensure speedy execution.
87+
pipe("a dog", num_inference_steps=2, max_sequence_length=16, height=256, width=256)
9188

9289
def test_torch_compile(self):
9390
self._test_torch_compile()
9491

9592
def test_torch_compile_with_cpu_offload(self):
9693
self._test_torch_compile_with_cpu_offload()
9794

98-
def test_torch_compile_with_group_offload_leaf(self):
99-
self._test_torch_compile_with_group_offload_leaf()
95+
def test_torch_compile_with_group_offload_leaf(self, use_stream=False):
96+
for cls in inspect.getmro(self.__class__):
97+
if "test_torch_compile_with_group_offload_leaf" in cls.__dict__ and cls is not QuantCompileTests:
98+
return
99+
self._test_torch_compile_with_group_offload_leaf(use_stream=use_stream)

tests/quantization/torchao/test_torchao.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -645,8 +645,9 @@ def quantization_config(self):
645645
)
646646
def test_torch_compile_with_cpu_offload(self):
647647
# RuntimeError: _apply(): Couldn't swap Linear.weight
648-
super()._test_torch_compile_with_cpu_offload()
648+
super().test_torch_compile_with_cpu_offload()
649649

650+
@parameterized.expand([False, True])
650651
@unittest.skip(
651652
"""
652653
For `use_stream=False`:
@@ -656,8 +657,7 @@ def test_torch_compile_with_cpu_offload(self):
656657
Using non-default stream requires ability to pin tensors. AQT does not seem to support this yet in TorchAO.
657658
"""
658659
)
659-
@parameterized.expand([False, True])
660-
def test_torch_compile_with_group_offload_leaf(self):
660+
def test_torch_compile_with_group_offload_leaf(self, use_stream):
661661
# For use_stream=False:
662662
# If we run group offloading without compilation, we will see:
663663
# RuntimeError: Attempted to set the storage of a tensor on device "cpu" to a storage on different device "cuda:0". This is no longer allowed; the devices must match.
@@ -670,7 +670,7 @@ def test_torch_compile_with_group_offload_leaf(self):
670670

671671
# For use_stream=True:
672672
# NotImplementedError: AffineQuantizedTensor dispatch: attempting to run unimplemented operator/function: func=<OpOverload(op='aten.is_pinned', overload='default')>, types=(<class 'torchao.dtypes.affine_quantized_tensor.AffineQuantizedTensor'>,), arg_types=(<class 'torchao.dtypes.affine_quantized_tensor.AffineQuantizedTensor'>,), kwarg_types={}
673-
super()._test_torch_compile_with_group_offload_leaf()
673+
super()._test_torch_compile_with_group_offload_leaf(use_stream=use_stream)
674674

675675

676676
# Slices for these tests have been obtained on our aws-g6e-xlarge-plus runners

0 commit comments

Comments
 (0)