|
15 | 15 |
|
16 | 16 | """Code that export quantized Hugging Face models for deployment.""" |
17 | 17 |
|
| 18 | +from builtins import ValueError |
18 | 19 | import collections.abc |
19 | 20 | import json |
20 | 21 | import re |
@@ -154,29 +155,23 @@ def _output_hook(module, input, output): |
154 | 155 | if getattr(model.config, "is_encoder_decoder", False): |
155 | 156 | # For encoder-decoder models, we need to pass both the encoder and decoder input ids |
156 | 157 | model(fake_input, decoder_input_ids=decoder_fake_input) |
157 | | - elif is_vl_model: |
158 | | - # For VL models, try to run optimization on just the language model part |
| 158 | + elif is_vl_model and "nemotron" in model_type: |
| 159 | + # For Nemotron VL models, try to run optimization on just the language model part |
159 | 160 | language_model, _ = get_language_model_from_vl(model) |
160 | | - if language_model is not None: |
161 | | - print( |
162 | | - "Found language_model component - running optimization on language model only" |
163 | | - ) |
164 | 161 |
|
165 | 162 | if language_model is not None: |
166 | 163 | # Run optimization on just the language model with the same input format as regular LLMs |
167 | 164 | # Use the same fake_input tensor that regular LLMs use |
168 | 165 | print( |
169 | 166 | f"Running optimization on language model with fake_input shape: {fake_input.shape}" |
170 | 167 | ) |
171 | | - try: |
172 | | - language_model(fake_input) |
173 | | - print("✅ Language model optimization completed successfully") |
174 | | - except Exception as e: |
175 | | - print(f"Language model optimization failed: {e}") |
176 | | - print("Continuing with export...") |
| 168 | + language_model(fake_input) |
177 | 169 | else: |
178 | | - print("Warning: No language_model found in VL model - skipping optimization") |
179 | | - print("This is unexpected for most VL models") |
| 170 | + raise ValueError( |
| 171 | + f"Cannot extract language_model from Nemotron VL model (type: {model_type}). " |
| 172 | + "This is required for requantization/resmoothing optimization. " |
| 173 | + "Please ensure the model architecture is supported or file an issue." |
| 174 | + ) |
180 | 175 | else: |
181 | 176 | model(fake_input) |
182 | 177 |
|
|
0 commit comments