Skip to content

Commit 815a4ff

Browse files
authored
[Qwen3Next] Add FP8 Quantization Example (#1886)
SUMMARY: Added qwen3 next fp8 quantization example. Model produced is uploaded [here](https://huggingface.co/shanjiaz/qwen3-80b-fp8-dynamic) TEST PLAN: Tested locally. --------- Signed-off-by: shanjiaz <[email protected]>
1 parent b06bf56 commit 815a4ff

File tree

1 file changed

+45
-0
lines changed

1 file changed

+45
-0
lines changed
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
from transformers import AutoModelForCausalLM, AutoTokenizer
2+
3+
from llmcompressor import oneshot
4+
from llmcompressor.modifiers.quantization import QuantizationModifier
5+
from llmcompressor.utils import dispatch_for_generation
6+
7+
MODEL_ID = "Qwen/Qwen3-Next-80B-A3B-Instruct"
8+
9+
# Load model.
10+
model = AutoModelForCausalLM.from_pretrained(
11+
MODEL_ID,
12+
torch_dtype="auto",
13+
low_cpu_mem_usage=True,
14+
trust_remote_code=True,
15+
)
16+
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
17+
18+
recipe = QuantizationModifier(
19+
targets=["Linear"],
20+
scheme="FP8_DYNAMIC",
21+
ignore=[
22+
"lm_head",
23+
"re:.*mlp.gate$",
24+
"re:.*mlp.shared_expert_gate$",
25+
"re:.*linear_attn.*",
26+
],
27+
)
28+
29+
# Apply quantization.
30+
oneshot(model=model, recipe=recipe)
31+
32+
# Confirm generations of the quantized model look sane.
33+
print("========== SAMPLE GENERATION ==============")
34+
dispatch_for_generation(model)
35+
input_ids = tokenizer("Hello my name is", return_tensors="pt").input_ids.to(
36+
model.device
37+
)
38+
output = model.generate(input_ids, max_new_tokens=20)
39+
print(tokenizer.decode(output[0]))
40+
print("==========================================")
41+
42+
# Save to disk in compressed-tensors format.
43+
SAVE_DIR = MODEL_ID.rstrip("/").split("/")[-1] + "-FP8-Dynamic"
44+
model.save_pretrained(SAVE_DIR, save_compressed=True)
45+
tokenizer.save_pretrained(SAVE_DIR)

0 commit comments

Comments
 (0)