Skip to content

Commit 183364e

Browse files
[examples] fix vision_tower/multi_modal_projector regexes (#1871)
SUMMARY: Resolves #1652 Our multimodal examples all ignore `"re:vision_tower.*"`, but this misses cases where the name is prefixed with something else (e.g. `model.vision_tower`). This PR loosens the regexes to allow for anything to precede `vision_tower` or `multi_modal_projector` and still be caught by the ignore. Layers beginning with `vision_tower`, without a prefix, will still be caught. Also some formatting fixes, which must not be included on `examples/` as part of ci/cd checks. TEST PLAN: Running `llm-compressor/examples/multimodal_vision/mistral3_example.py` on latest main shows we are quantizing layers we don't want to be: ``` 2025-09-26T20:02:43.571160+0000 | compress_modules | INFO - Quantizing model.vision_tower.transformer.layers.4.feed_forward.gate_proj using 512 samples ``` After these changes, those don't appear in the logs --------- Signed-off-by: Brian Dellabetta <[email protected]> Co-authored-by: Fynn Schmitt-Ulms <[email protected]>
1 parent fae9429 commit 183364e

File tree

13 files changed

+31
-25
lines changed

13 files changed

+31
-25
lines changed

examples/multimodal_vision/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ recipe = [
3737
targets="Linear",
3838
scheme="W4A16",
3939
sequential_targets=["MistralDecoderLayer"],
40-
ignore=["re:.*lm_head", "re:vision_tower.*", "re:multi_modal_projector.*"],
40+
ignore=["re:.*lm_head", "re:.*vision_tower.*", "re:.*multi_modal_projector.*"],
4141
),
4242
]
4343
```

examples/multimodal_vision/llama4_example.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -52,9 +52,11 @@ def preprocess_function(example):
5252
def data_collator(batch):
5353
assert len(batch) == 1
5454
return {
55-
key: torch.tensor(value)
56-
if key != "pixel_values"
57-
else torch.tensor(value, dtype=torch.bfloat16).squeeze(0)
55+
key: (
56+
torch.tensor(value)
57+
if key != "pixel_values"
58+
else torch.tensor(value, dtype=torch.bfloat16).squeeze(0)
59+
)
5860
for key, value in batch[0].items()
5961
}
6062

@@ -67,8 +69,8 @@ def data_collator(batch):
6769
"re:.*lm_head",
6870
"re:.*self_attn",
6971
"re:.*router",
70-
"re:vision_model.*",
71-
"re:multi_modal_projector.*",
72+
"re:.*vision_model.*",
73+
"re:.*multi_modal_projector.*",
7274
"Llama4TextAttention",
7375
],
7476
)

examples/multimodal_vision/llava_example.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ def data_collator(batch):
3030
GPTQModifier(
3131
targets="Linear",
3232
scheme="W4A16",
33-
ignore=["re:.*lm_head", "re:vision_tower.*", "re:multi_modal_projector.*"],
33+
ignore=["re:.*lm_head", "re:.*vision_tower.*", "re:.*multi_modal_projector.*"],
3434
),
3535
]
3636

examples/multimodal_vision/mistral3_example.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,11 @@
3131
def data_collator(batch):
3232
assert len(batch) == 1
3333
return {
34-
key: torch.tensor(value)
35-
if key != "pixel_values"
36-
else torch.tensor(value, dtype=model.dtype)
34+
key: (
35+
torch.tensor(value)
36+
if key != "pixel_values"
37+
else torch.tensor(value, dtype=model.dtype)
38+
)
3739
for key, value in batch[0].items()
3840
}
3941

@@ -43,7 +45,7 @@ def data_collator(batch):
4345
GPTQModifier(
4446
targets="Linear",
4547
scheme="W4A16",
46-
ignore=["re:.*lm_head", "re:vision_tower.*", "re:multi_modal_projector.*"],
48+
ignore=["re:.*lm_head", "re:.*vision_tower.*", "re:.*multi_modal_projector.*"],
4749
),
4850
]
4951

examples/multimodal_vision/mllama_example.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ def data_collator(batch):
3030
GPTQModifier(
3131
targets="Linear",
3232
scheme="W4A16",
33-
ignore=["re:.*lm_head", "re:multi_modal_projector.*", "re:vision_model.*"],
33+
ignore=["re:.*lm_head", "re:.*multi_modal_projector.*", "re:.*vision_model.*"],
3434
),
3535
]
3636

examples/multimodal_vision/pixtral_example.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ def data_collator(batch):
3636
GPTQModifier(
3737
targets="Linear",
3838
scheme="W4A16",
39-
ignore=["re:.*lm_head", "re:vision_tower.*", "re:multi_modal_projector.*"],
39+
ignore=["re:.*lm_head", "re:.*vision_tower.*", "re:.*multi_modal_projector.*"],
4040
),
4141
]
4242

examples/quantization_w4a4_fp4/llama4_example.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -52,9 +52,11 @@ def preprocess_function(example):
5252
def data_collator(batch):
5353
assert len(batch) == 1
5454
return {
55-
key: torch.tensor(value)
56-
if key != "pixel_values"
57-
else torch.tensor(value, dtype=torch.bfloat16).squeeze(0)
55+
key: (
56+
torch.tensor(value)
57+
if key != "pixel_values"
58+
else torch.tensor(value, dtype=torch.bfloat16).squeeze(0)
59+
)
5860
for key, value in batch[0].items()
5961
}
6062

@@ -67,8 +69,8 @@ def data_collator(batch):
6769
"re:.*lm_head",
6870
"re:.*self_attn",
6971
"re:.*router",
70-
"re:vision_model.*",
71-
"re:multi_modal_projector.*",
72+
"re:.*vision_model.*",
73+
"re:.*multi_modal_projector.*",
7274
"Llama4TextAttention",
7375
],
7476
)

examples/quantization_w8a8_fp8/llama3.2_vision_example.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
recipe = QuantizationModifier(
1818
targets="Linear",
1919
scheme="FP8_DYNAMIC",
20-
ignore=["re:.*lm_head", "re:multi_modal_projector.*", "re:vision_model.*"],
20+
ignore=["re:.*lm_head", "re:.*multi_modal_projector.*", "re:.*vision_model.*"],
2121
)
2222

2323
# Apply quantization and save to disk in compressed-tensors format.

examples/quantization_w8a8_fp8/llama4_fp8_block_example.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,8 @@
2222
"re:.*lm_head",
2323
"re:.*self_attn",
2424
"re:.*router",
25-
"re:vision_model.*",
26-
"re:multi_modal_projector.*",
25+
"re:.*vision_model.*",
26+
"re:.*multi_modal_projector.*",
2727
"Llama4TextAttention",
2828
],
2929
)

examples/quantization_w8a8_fp8/llava1.5_example.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
recipe = QuantizationModifier(
1818
targets="Linear",
1919
scheme="FP8_DYNAMIC",
20-
ignore=["re:.*lm_head", "re:multi_modal_projector.*", "re:vision_tower.*"],
20+
ignore=["re:.*lm_head", "re:.*multi_modal_projector.*", "re:.*vision_tower.*"],
2121
)
2222

2323
# Apply quantization and save to disk in compressed-tensors format.

0 commit comments

Comments
 (0)