Skip to content

Commit 5bd51df

Browse files
updated example
Signed-off-by: Brian Dellabetta <[email protected]>
1 parent a88ca3c commit 5bd51df

File tree

1 file changed

+11
-7
lines changed

1 file changed

+11
-7
lines changed

examples/transform/llama3_example.py

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from llmcompressor.utils import dispatch_for_generation
88

99
# Select model and load it.
10-
MODEL_ID = "meta-llama/Meta-Llama-3-8B-Instruct"
10+
MODEL_ID = "meta-llama/Llama-3.2-1B-Instruct" # "meta-llama/Meta-Llama-3-8B-Instruct"
1111

1212
model = AutoModelForCausalLM.from_pretrained(
1313
MODEL_ID,
@@ -57,6 +57,10 @@ def tokenize(sample):
5757
# Configure the quantization algorithm to run.
5858
# * quantize the weights to 4 bit with GPTQ with a group size 128
5959
recipe = [
60+
# TODO preset_config="LLAMA_SPINQUANT_R1R2" outputs gibberish
61+
# TODO preset_config="QUIP_ONLINE" outputs gibberish
62+
# preset_config="QUIP" output sensible, but cannot load saved
63+
# checkpoint or run evals (~4hrs to run)
6064
TransformModifier(preset_config="LLAMA_SPINQUANT_R1R2"),
6165
QuantizationModifier(targets="Linear", scheme="W4A16", ignore=["lm_head"]),
6266
]
@@ -72,12 +76,12 @@ def tokenize(sample):
7276
)
7377

7478
# # Confirm generations of the quantized model look sane.
75-
# print("\n\n")
76-
# print("========== SAMPLE GENERATION ==============")
77-
# dispatch_for_generation(model)
78-
# input_ids = tokenizer("Hello my name is", return_tensors="pt").input_ids.to("cuda")
79-
# output = model.generate(input_ids, max_new_tokens=100)
80-
# print(tokenizer.decode(output[0]))
79+
print("\n\n")
80+
print("========== SAMPLE GENERATION ==============")
81+
dispatch_for_generation(model)
82+
input_ids = tokenizer("Hello my name is", return_tensors="pt").input_ids.to("cuda")
83+
output = model.generate(input_ids, max_new_tokens=100)
84+
print(tokenizer.decode(output[0]))
8185
# print("==========================================\n\n")
8286

8387
# Save to disk compressed.

0 commit comments

Comments
 (0)