Skip to content

Commit 33b7216

Browse files
committed
update examples, enable
Signed-off-by: Kyle Sayers <kylesayrs@gmail.com>
1 parent a2cfdda commit 33b7216

File tree

2 files changed

+10
-11
lines changed

2 files changed

+10
-11
lines changed

examples/quantization_w4a16/llama3_example.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020

2121
# Select number of samples. 512 samples is a good place to start.
2222
# Increasing the number of samples can improve accuracy.
23-
NUM_CALIBRATION_SAMPLES = 1
23+
NUM_CALIBRATION_SAMPLES = 512
2424
MAX_SEQUENCE_LENGTH = 2048
2525

2626
# Load dataset and preprocess.
@@ -66,15 +66,15 @@ def tokenize(sample):
6666
num_calibration_samples=NUM_CALIBRATION_SAMPLES,
6767
)
6868

69-
# Confirm generations of the quantized model look sane.
70-
print("\n\n")
71-
print("========== SAMPLE GENERATION ==============")
72-
input_ids = tokenizer("Hello my name is", return_tensors="pt").input_ids.to(
73-
model.device
74-
)
75-
output = model.generate(input_ids, max_new_tokens=100)
76-
print(tokenizer.decode(output[0]))
77-
print("==========================================\n\n")
69+
# # Confirm generations of the quantized model look sane.
70+
# print("\n\n")
71+
# print("========== SAMPLE GENERATION ==============")
72+
# input_ids = tokenizer("Hello my name is", return_tensors="pt").input_ids.to(
73+
# model.device
74+
# )
75+
# output = model.generate(input_ids, max_new_tokens=100)
76+
# print(tokenizer.decode(output[0]))
77+
# print("==========================================\n\n")
7878

7979
# Save to disk compressed.
8080
SAVE_DIR = MODEL_ID.split("/")[1] + "-W4A16-G128"

src/llmcompressor/modifiers/quantization/gptq/base.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -305,7 +305,6 @@ def calibrate_module(
305305
args: Tuple[torch.Tensor, ...],
306306
_output: torch.Tensor,
307307
):
308-
return
309308
"""
310309
Quantize a module's weight according to the GPTQ algorithm
311310

0 commit comments

Comments
 (0)