@@ -211,6 +211,9 @@ def main(args):
211211 random .seed (RAND_SEED )
212212 np .random .seed (RAND_SEED )
213213
214+ # Detect if this is a Nemotron VL model
215+ is_nemotron_vl = "nemotron" in args .pyt_ckpt_path .lower () and "vl" in args .pyt_ckpt_path .lower ()
216+
214217 # launch a memory monitor to read the currently used GPU memory.
215218 launch_memory_monitor ()
216219
@@ -277,16 +280,6 @@ def main(args):
277280
278281 model_type = get_model_type (model )
279282
280- # Special handling for Nemotron VL models that aren't detected by standard model type detection
281- # For HF export, we want to keep vision unquantized, so we treat it as a regular language model
282- # and only quantize the language components
283- if model_type != "mllama" and is_multimodal_model (model ):
284- print (
285- f"Detected multimodal model: { type (model ).__name__ } . "
286- f"For HF export, will quantize language components only, keeping vision unquantized."
287- )
288- # Keep as regular model type to use text-only calibration
289-
290283 device = model .device
291284 if hasattr (model , "model" ):
292285 device = model .model .device
@@ -471,9 +464,6 @@ def main(args):
471464 )
472465
473466 # For Nemotron VL models, disable quantization of vision components
474- is_nemotron_vl = (
475- "nemotron" in args .pyt_ckpt_path .lower () and "vl" in args .pyt_ckpt_path .lower ()
476- )
477467 if is_nemotron_vl :
478468 print ("Disabling quantization for vision components in Nemotron VL model" )
479469 quant_cfg ["quant_cfg" ]["*vision*" ] = {"enable" : False }
@@ -489,9 +479,6 @@ def main(args):
489479 ][0 :1 ]
490480
491481 # For Nemotron VL models, try text-only generation first, then VL generation as additional test
492- is_nemotron_vl = (
493- "nemotron" in args .pyt_ckpt_path .lower () and "vl" in args .pyt_ckpt_path .lower ()
494- )
495482 if is_nemotron_vl :
496483 print ("Running text-only preview generation for Nemotron VL model..." )
497484 try :
0 commit comments