Skip to content

Commit d1b00c7

Browse files
dsikkakylesayrs
andauthored
[Tests] Fix failing nightly quantization tests (#1744)
## Purpose ## * Fix failing tests introduced by adding `dispatch_for_generation` to the data free pipeline ## Changes ## * After oneshot, oneshot should remove any dispatches from the model. However, there is a bug fixed [here](neuralmagic/compressed-tensors#427) where models which fit entirely on one GPU do not have their dispatches removed (since they do not have hooks) * As a result, we need to move weights to the same device before comparing them for `test_quantization_reload` * The `test_perplexity` test was implicitly relying on the model being dispatched to GPUs. Now explicitly `dispatch_for_generation`, similar to how we do in our examples ## Testing ## * Nightly and commit tests passed locally --------- Signed-off-by: Kyle Sayers <[email protected]> Co-authored-by: Kyle Sayers <[email protected]>
1 parent 4dec2c3 commit d1b00c7

File tree

2 files changed

+15
-6
lines changed

2 files changed

+15
-6
lines changed

tests/llmcompressor/transformers/compression/test_quantization.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from llmcompressor.args import DatasetArguments
1515
from llmcompressor.pytorch.utils import tensors_to_device
1616
from llmcompressor.transformers.finetune.data import TextGenerationDataset
17+
from llmcompressor.utils.dev import dispatch_for_generation
1718
from tests.testing_utils import parse_params, requires_gpu
1819

1920
CONFIGS_DIRECTORY = "tests/llmcompressor/transformers/compression/configs"
@@ -37,7 +38,7 @@ def setUpClass(cls):
3738
cls.test_dir = tempfile.mkdtemp()
3839

3940
cls.model = AutoModelForCausalLM.from_pretrained(
40-
cls.model_stub, torch_dtype=cls.weight_dtype, device_map="cuda:0"
41+
cls.model_stub, torch_dtype=cls.weight_dtype
4142
)
4243
model = cls._run_oneshot(
4344
cls.model,
@@ -99,18 +100,19 @@ def test_quantization_reload(self):
99100
model_reloaded = AutoModelForCausalLM.from_pretrained(
100101
os.path.join(self.test_dir, self.output),
101102
torch_dtype="auto",
102-
device_map="cuda:0",
103103
)
104104

105105
og_weights, og_inputs = self._get_quant_info(self.model)
106106
reloaded_weights, reloaded_inputs = self._get_quant_info(model_reloaded)
107+
# TODO: can remove `to` calls after
108+
# https://github.com/neuralmagic/compressed-tensors/pull/427
107109

108110
for name, (o_scale, o_zp, o_weight) in og_weights.items():
109111
n_scale, n_zp, n_weight = reloaded_weights[name]
110112
assert o_scale.dtype == n_scale.dtype == self.weight_dtype
111-
assert torch.equal(o_scale, n_scale)
113+
assert torch.equal(o_scale, n_scale.to(o_scale.device))
112114
assert o_zp.dtype == n_zp.dtype
113-
assert torch.equal(o_zp, n_zp)
115+
assert torch.equal(o_zp, n_zp.to(o_zp.device))
114116

115117
# we don't expect an exact match here because o_weight still has the
116118
# original weight and n_weight has been fake_quantized
@@ -119,9 +121,9 @@ def test_quantization_reload(self):
119121
for name, (o_scale, o_zp) in og_inputs.items():
120122
n_scale, n_zp = reloaded_inputs[name]
121123
assert o_scale.dtype == n_scale.dtype == self.weight_dtype
122-
assert torch.equal(o_scale, n_scale)
124+
assert torch.equal(o_scale, n_scale.to(o_scale.device))
123125
assert o_zp.dtype == n_zp.dtype
124-
assert torch.equal(o_zp, n_zp)
126+
assert torch.equal(o_zp, n_zp.to(o_zp.device))
125127

126128
def _get_dataloader(self, dataset_args, tokenizer):
127129
dataset_manager = TextGenerationDataset.load_from_registry(
@@ -150,6 +152,7 @@ def test_perplexity(self):
150152
max_seq_length=self.max_seq_length,
151153
)
152154
dataloader = self._get_dataloader(dataset_args, tokenizer)
155+
dispatch_for_generation(self.model)
153156

154157
total_ppl = 0.0
155158
total_non_nan = 0

tests/testing_utils.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@
1313

1414
from tests.data import CustomTestConfig, TestConfig
1515

16+
TEST_DATA_FILE = os.environ.get("TEST_DATA_FILE", None)
17+
1618

1719
# TODO: probably makes sense to move this type of function to a more central place,
1820
# which can be used by __init__.py as well
@@ -78,6 +80,10 @@ def _parse_configs_dir(current_config_dir):
7880

7981
for file in os.listdir(current_config_dir):
8082
config_path = os.path.join(current_config_dir, file)
83+
if TEST_DATA_FILE is not None:
84+
if not config_path.endswith(TEST_DATA_FILE):
85+
continue
86+
8187
config = _load_yaml(config_path)
8288
if not config:
8389
continue

0 commit comments

Comments
 (0)