Skip to content

Commit fe4e28a

Browse files
committed
update
1 parent 399fd4e commit fe4e28a

File tree

3 files changed

+38
-27
lines changed

3 files changed

+38
-27
lines changed

examples/quantization_w4a4_fp4/qwen3_5_moe.py

Lines changed: 35 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from datasets import load_dataset
33
from llmcompressor import oneshot
44
from llmcompressor.modifiers.quantization import QuantizationModifier
5+
import torch
56

67
MODEL_ID = "/raid/engine/dsikka/models--Qwen--Qwen3.5-397B-A17B/snapshots/7cad2bae11cb49ca79f7d6a0954de2e2756f4e27"
78

@@ -25,48 +26,57 @@
2526
],
2627
)
2728

28-
DATASET_ID = "HuggingFaceH4/ultrachat_200k"
29-
DATASET_SPLIT = "train_sft"
30-
31-
# Select number of samples
29+
DATASET_ID = "neuralmagic/calibration"
3230
NUM_CALIBRATION_SAMPLES = 20
33-
MAX_SEQUENCE_LENGTH = 2048
31+
MAX_SEQUENCE_LENGTH = 8192
3432

35-
# Load dataset and preprocess.
36-
ds = load_dataset(DATASET_ID, split=f"{DATASET_SPLIT}[:{NUM_CALIBRATION_SAMPLES}]")
37-
ds = ds.shuffle(seed=42)
33+
ds = load_dataset(DATASET_ID, name="LLM", split=f"train[:{NUM_CALIBRATION_SAMPLES}]")
3834

3935

40-
def preprocess(example):
41-
return {
42-
"text": processor.apply_chat_template(
43-
example["messages"],
44-
tokenize=False,
36+
def preprocess_function(example):
37+
messgages = []
38+
for message in example["messages"]:
39+
messgages.append(
40+
{
41+
"role": message["role"],
42+
"content": [{"type": "text", "text": message["content"]}],
43+
}
4544
)
46-
}
47-
4845

49-
ds = ds.map(preprocess)
50-
51-
52-
# Tokenize inputs.
53-
def tokenize(sample):
54-
return processor(
55-
sample["text"],
46+
return processor.apply_chat_template(
47+
messgages,
48+
return_tensors="pt",
5649
padding=False,
57-
max_length=MAX_SEQUENCE_LENGTH,
5850
truncation=True,
51+
max_length=MAX_SEQUENCE_LENGTH,
52+
tokenize=True,
5953
add_special_tokens=False,
54+
return_dict=True,
55+
add_generation_prompt=False,
6056
)
6157

6258

63-
ds = ds.map(tokenize, remove_columns=ds.column_names)
59+
ds = ds.map(preprocess_function, batched=False, remove_columns=ds.column_names)
60+
61+
62+
def data_collator(batch):
63+
assert len(batch) == 1
64+
return {
65+
key: (
66+
torch.tensor(value)
67+
if key != "pixel_values"
68+
else torch.tensor(value, dtype=torch.bfloat16).squeeze(0)
69+
)
70+
for key, value in batch[0].items()
71+
}
72+
6473

6574

6675
# Apply quantization.
6776
oneshot(model=model,
6877
recipe=recipe,
69-
dataset=ds,
78+
dataset=ds,
79+
data_collator=data_collator,
7080
max_seq_length=MAX_SEQUENCE_LENGTH,
7181
num_calibration_samples=NUM_CALIBRATION_SAMPLES,
7282
moe_calibrate_all_experts=True)

src/llmcompressor/modeling/qwen3_5_vl_moe.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ def __init__(
2222
self,
2323
original: "Qwen3_5MoeSparseMoeBlock",
2424
config: "Qwen3_5MoeConfig",
25-
calibrate_all_experts: bool,
25+
calibrate_all_experts: bool = True,
2626
):
2727
super().__init__()
2828
text_config: "Qwen3_5MoeTextConfig" = config.get_text_config()
@@ -33,6 +33,7 @@ def __init__(
3333
self.shared_expert_gate = original.shared_expert_gate
3434
self.gate = original.gate
3535
self.experts = SequentialQwen3VLMoeTextExperts(text_config, original.experts)
36+
self.calibrate_all_experts = calibrate_all_experts
3637

3738
def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
3839
batch_size, sequence_length, hidden_dim = hidden_states.shape

src/llmcompressor/utils/pytorch/module.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -337,7 +337,7 @@ def get_no_split_params(model: PreTrainedModel) -> Union[str, List[str]]:
337337
338338
:return: list of class names that shouldn't be split
339339
"""
340-
no_split_modules = model._get_no_split_modules("auto")
340+
no_split_modules = model._no_split_modules
341341
if len(no_split_modules) <= 0:
342342
return ALL_TARGET
343343

0 commit comments

Comments
 (0)