Skip to content

Commit 0e35d14

Browse files
committed
added fixes for nvbug 5710649
Signed-off-by: Suguna Velury <[email protected]>
1 parent 0f12136 commit 0e35d14

File tree

2 files changed

+32
-3
lines changed

2 files changed

+32
-3
lines changed

examples/llm_ptq/multinode_ptq.py

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,12 +30,19 @@
3030
from accelerate import Accelerator
3131
from example_utils import build_quant_cfg, get_tokenizer
3232
from tqdm import tqdm
33-
from transformers import AutoModelForCausalLM, PreTrainedTokenizer, PreTrainedTokenizerFast
33+
from transformers import (
34+
AutoConfig,
35+
AutoModelForCausalLM,
36+
AutoProcessor,
37+
PreTrainedTokenizer,
38+
PreTrainedTokenizerFast,
39+
)
3440

3541
import modelopt.torch.opt as mto
3642
import modelopt.torch.quantization as mtq
3743
from modelopt.torch.export import get_model_type
3844
from modelopt.torch.export.convert_hf_config import convert_hf_quant_config_format
45+
from modelopt.torch.export.model_utils import is_multimodal_model
3946
from modelopt.torch.export.unified_export_hf import _export_hf_checkpoint
4047
from modelopt.torch.quantization.config import need_calibration
4148
from modelopt.torch.quantization.utils import patch_fsdp_mp_dtypes
@@ -243,6 +250,28 @@ def export_model(
243250
export_dir = Path(export_path)
244251
export_dir.mkdir(parents=True, exist_ok=True)
245252

253+
# Check if the model is a multimodal/VLM model
254+
is_vlm = is_multimodal_model(model)
255+
256+
if is_vlm:
257+
# Save original model config and the processor config to the export path for VLMs.
258+
print(f"Saving original model config to {export_path}")
259+
260+
config_kwargs = {"trust_remote_code": args.trust_remote_code}
261+
if args.attn_implementation is not None:
262+
config_kwargs["attn_implementation"] = args.attn_implementation
263+
AutoConfig.from_pretrained(args.pyt_ckpt_path, **config_kwargs).save_pretrained(export_path)
264+
265+
# Try to save processor config if available
266+
try:
267+
print(f"Saving processor config to {export_path}")
268+
AutoProcessor.from_pretrained(
269+
args.pyt_ckpt_path, trust_remote_code=args.trust_remote_code
270+
).save_pretrained(export_path)
271+
except Exception as e:
272+
print(f"Warning: Could not save processor config: {e}")
273+
print("This is normal for some VLM architectures that don't use AutoProcessor")
274+
246275
post_state_dict, hf_quant_config = _export_hf_checkpoint(
247276
model, torch.bfloat16, accelerator=accelerator
248277
)

modelopt/torch/export/unified_export_hf.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -536,8 +536,8 @@ def _export_hf_checkpoint(
536536
quantizer_attrs=["gate_up_proj_input_quantizer", "down_proj_input_quantizer"],
537537
)
538538
# Export the quantized weights
539-
for weight_name in ["gate_up_proj", "down_proj"]:
540-
with fsdp2_aware_weight_update(model, sub_module, reshard=False):
539+
with fsdp2_aware_weight_update(model, sub_module, reshard=False):
540+
for weight_name in ["gate_up_proj", "down_proj"]:
541541
_export_quantized_weight(sub_module, dtype, weight_name)
542542

543543
if accelerator is not None:

0 commit comments

Comments
 (0)