Skip to content
Draft
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions tests/e2e/vLLM/configs/qwen3_w4a16_grouped_quant.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
cadence: "nightly"
test_type: "regression"
model: Qwen/Qwen3-30B-A3B
dataset_id: HuggingFaceH4/ultrachat_200k
dataset_split: train_sft
scheme: W4A16_group
num_calibration_samples: 20
save_dir: "Qwen3-30B-A3B-W4A16-first-10"
recipe: tests/e2e/vLLM/recipes/WNA16/recipe_w4a16_group_quant_first_10_layers.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
quant_stage:
quant_modifiers:
GPTQModifier:
ignore: [
"lm_head",
# Ignore layers (10+)
"re:.*model\\.layers\\.([1-9][0-9])\\..*",
]
actorder: null
config_groups:
group_0:
weights:
num_bits: 4
type: "int"
symmetric: True
strategy: "group"
group_size: 128
input_activations: null
output_activations: null
targets: ["Linear"]
Loading