Skip to content

Commit 0e00954

Browse files
committed
update
Signed-off-by: Zhiyu Cheng <[email protected]>
1 parent f9b88fd commit 0e00954

File tree

1 file changed

+9
-14
lines changed

1 file changed

+9
-14
lines changed

modelopt/torch/export/unified_export_hf.py

Lines changed: 9 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515

1616
"""Code that export quantized Hugging Face models for deployment."""
1717

18+
from builtins import ValueError
1819
import collections.abc
1920
import json
2021
import re
@@ -154,29 +155,23 @@ def _output_hook(module, input, output):
154155
if getattr(model.config, "is_encoder_decoder", False):
155156
# For encoder-decoder models, we need to pass both the encoder and decoder input ids
156157
model(fake_input, decoder_input_ids=decoder_fake_input)
157-
elif is_vl_model:
158-
# For VL models, try to run optimization on just the language model part
158+
elif is_vl_model and "nemotron" in model_type:
159+
# For Nemotron VL models, try to run optimization on just the language model part
159160
language_model, _ = get_language_model_from_vl(model)
160-
if language_model is not None:
161-
print(
162-
"Found language_model component - running optimization on language model only"
163-
)
164161

165162
if language_model is not None:
166163
# Run optimization on just the language model with the same input format as regular LLMs
167164
# Use the same fake_input tensor that regular LLMs use
168165
print(
169166
f"Running optimization on language model with fake_input shape: {fake_input.shape}"
170167
)
171-
try:
172-
language_model(fake_input)
173-
print("✅ Language model optimization completed successfully")
174-
except Exception as e:
175-
print(f"Language model optimization failed: {e}")
176-
print("Continuing with export...")
168+
language_model(fake_input)
177169
else:
178-
print("Warning: No language_model found in VL model - skipping optimization")
179-
print("This is unexpected for most VL models")
170+
raise ValueError(
171+
f"Cannot extract language_model from Nemotron VL model (type: {model_type}). "
172+
"This is required for requantization/resmoothing optimization. "
173+
"Please ensure the model architecture is supported or file an issue."
174+
)
180175
else:
181176
model(fake_input)
182177

0 commit comments

Comments
 (0)