Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
cadence: "commit"
test_type: "regression"
model_stub: "Xenova/llama2.c-stories15M"
new_recipe: "tests/llmcompressor/transformers/compression/recipes/new_quant_channel.yaml"
ppl_threshold: 30000
new_recipe: "tests/llmcompressor/transformers/compression/recipes/new_quant_channel.yaml"
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
cadence: "commit"
test_type: "regression"
model_stub: "Xenova/llama2.c-stories15M"
new_recipe: "tests/llmcompressor/transformers/compression/recipes/new_quant_fp8.yaml"
ppl_threshold: 30000
new_recipe: "tests/llmcompressor/transformers/compression/recipes/new_quant_fp8.yaml"
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
cadence: "commit"
test_type: "regression"
model_stub: "Xenova/llama2.c-stories15M"
new_recipe: "tests/llmcompressor/transformers/compression/recipes/new_quant_full.yaml"
ppl_threshold: 30000
new_recipe: "tests/llmcompressor/transformers/compression/recipes/new_quant_full.yaml"
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
cadence: "nightly"
test_type: "regression"
model_stub: "TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T"
new_recipe: "tests/llmcompressor/transformers/compression/recipes/new_quant_weight.yaml"
ppl_threshold: 20
new_recipe: "tests/llmcompressor/transformers/compression/recipes/new_quant_weight.yaml"
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
cadence: "commit"
test_type: "regression"
model_stub: "Xenova/llama2.c-stories15M"
new_recipe: "tests/llmcompressor/transformers/compression/recipes/new_quant_weight.yaml"
ppl_threshold: 30000
new_recipe: "tests/llmcompressor/transformers/compression/recipes/new_quant_weight.yaml"
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
cadence: "commit"
cadence: "nightly"
test_type: "regression"
compressed_model_stub: "nm-testing/tinyllama-w4a16-compressed"
skeleton_model_stub: "TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T"
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
cadence: "commit"
cadence: "nightly"
test_type: "regression"
compressed_model_stub: "nm-testing/tinyllama-w8a16-dense"
skeleton_model_stub: "TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T"
11 changes: 6 additions & 5 deletions tests/llmcompressor/transformers/compression/test_decompress.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import tempfile
import unittest

import torch
from compressed_tensors import QUANTIZATION_CONFIG_NAME
from compressed_tensors.compressors import ModelCompressor
from compressed_tensors.quantization import QuantizationStatus
Expand Down Expand Up @@ -113,16 +114,16 @@ def test_hf_quantizer_decompress_match_manual_decompress(self):
)
inputs = inputs.to(self.decompressed_model_manual.device)

decompressed_model_manual_output = self.tokenizer.batch_decode(
self.decompressed_model_manual.generate(**inputs, max_length=50)
decompressed_model_manual_output = self.decompressed_model_manual.generate(
**inputs, max_length=50
)

decompressed_model_hf_quantizer_out = self.tokenizer.batch_decode(
decompressed_model_hf_quantizer_out = (
self.decompressed_model_hf_quantizer.generate(**inputs, max_length=50)
)

assert (
decompressed_model_hf_quantizer_out == decompressed_model_manual_output
assert torch.equal(
decompressed_model_hf_quantizer_out, decompressed_model_manual_output
)

@classmethod
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def tearDownClass(cls):

@staticmethod
def _run_oneshot(model, recipe, dataset, output_dir):
num_calibration_samples = 512
num_calibration_samples = 64
max_seq_length = 512
pad_to_max_length = False

Expand All @@ -68,7 +68,7 @@ def _run_oneshot(model, recipe, dataset, output_dir):
recipe=recipe,
pad_to_max_length=pad_to_max_length,
clear_sparse_session=False,
splits={"calibration": "train_gen[:5%]"},
splits={"calibration": "train_gen[:1%]"},
save_compressed=False,
)
return model
Expand Down Expand Up @@ -142,6 +142,8 @@ def _get_dataloader(self, data_args, tokenizer):

@torch.no_grad()
def test_perplexity(self):
if self.ppl_threshold is None:
pytest.skip("Skipping perplexity calculation.")
tokenizer = AutoTokenizer.from_pretrained(self.model_stub)
data_args = DatasetArguments(
dataset="ultrachat-200k",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import tempfile
import unittest

import torch
from compressed_tensors import QUANTIZATION_CONFIG_NAME
from compressed_tensors.compressors import ModelCompressor
from compressed_tensors.quantization import QuantizationStatus
Expand Down Expand Up @@ -113,16 +114,16 @@ def test_hf_quantizer_decompress_match_manual_decompress(self):
)
inputs = inputs.to(self.decompressed_model_manual.device)

decompressed_model_manual_output = self.tokenizer.batch_decode(
self.decompressed_model_manual.generate(**inputs, max_length=50)
decompressed_model_manual_output = self.decompressed_model_manual.generate(
**inputs, max_length=50
)

decompressed_model_hf_quantizer_out = self.tokenizer.batch_decode(
decompressed_model_hf_quantizer_out = (
self.decompressed_model_hf_quantizer.generate(**inputs, max_length=50)
)

assert (
decompressed_model_hf_quantizer_out == decompressed_model_manual_output
assert torch.equal(
decompressed_model_hf_quantizer_out, decompressed_model_manual_output
)

@classmethod
Expand Down