Skip to content

Commit d69da65

Browse files
committed
add example
Signed-off-by: Kyle Sayers <[email protected]>
1 parent 17decae commit d69da65

File tree

2 files changed

+84
-0
lines changed

2 files changed

+84
-0
lines changed
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
import requests
2+
import torch
3+
from PIL import Image
4+
from transformers import AutoProcessor, Gemma3nForConditionalGeneration
5+
6+
from llmcompressor import oneshot
7+
from llmcompressor.modifiers.quantization import GPTQModifier
8+
from llmcompressor.utils import dispatch_for_generation
9+
10+
# Load model.
11+
model_id = "google/gemma-3n-E2B-it"
12+
model = Gemma3nForConditionalGeneration.from_pretrained(model_id, torch_dtype="auto")
13+
processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
14+
15+
# Oneshot arguments
16+
DATASET_ID = "flickr30k"
17+
DATASET_SPLIT = {"calibration": "test[:512]"}
18+
NUM_CALIBRATION_SAMPLES = 512
19+
MAX_SEQUENCE_LENGTH = 2048
20+
21+
22+
# Define a oneshot data collator for multimodal inputs.
23+
def data_collator(batch):
24+
assert len(batch) == 1
25+
return {key: torch.tensor(value) for key, value in batch[0].items()}
26+
27+
28+
# Recipe
29+
recipe = [
30+
GPTQModifier(
31+
targets="Linear",
32+
scheme="W4A16",
33+
ignore=[
34+
"re:.*embed_audio.*",
35+
"re:.*embed_vision.*",
36+
"re:.*audio_tower.*",
37+
"re:.*vision_tower.*",
38+
"re:.*altup.*",
39+
"re:.*lm_head.*",
40+
"re:.*laurel.*",
41+
],
42+
),
43+
]
44+
45+
# Perform oneshot
46+
oneshot(
47+
model=model,
48+
tokenizer=model_id,
49+
dataset=DATASET_ID,
50+
splits=DATASET_SPLIT,
51+
recipe=recipe,
52+
max_seq_length=MAX_SEQUENCE_LENGTH,
53+
num_calibration_samples=NUM_CALIBRATION_SAMPLES,
54+
trust_remote_code_model=True,
55+
data_collator=data_collator,
56+
)
57+
58+
# Confirm generations of the quantized model look sane.
59+
print("========== SAMPLE GENERATION ==============")
60+
dispatch_for_generation(model)
61+
messages = [
62+
{
63+
"role": "user",
64+
"content": [
65+
{"type": "text", "text": "Please describe the animal in this image\n"},
66+
{"type": "image"},
67+
],
68+
},
69+
]
70+
prompt = processor.apply_chat_template(messages, add_generation_prompt=True)
71+
image_url = "http://images.cocodataset.org/train2017/000000231895.jpg"
72+
raw_image = Image.open(requests.get(image_url, stream=True).raw)
73+
74+
# Note: compile is disabled: https://github.com/huggingface/transformers/issues/38333
75+
inputs = processor(images=raw_image, text=prompt, return_tensors="pt").to("cuda")
76+
output = model.generate(**inputs, max_new_tokens=100, disable_compile=True)
77+
print(processor.decode(output[0], skip_special_tokens=True))
78+
print("==========================================")
79+
80+
# Save to disk compressed.
81+
SAVE_DIR = model_id.rstrip("/").split("/")[-1] + "-W4A16-G128"
82+
model.save_pretrained(SAVE_DIR, save_compressed=True)
83+
processor.save_pretrained(SAVE_DIR)

tests/llmcompressor/pipelines/sequential/ast_utils.py/test_auto_wrapper.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
# flake8: noqa
12
import ast
23
import textwrap
34
from types import SimpleNamespace

0 commit comments

Comments
 (0)