From 39ccfad50f6b1ab4156b2e75e566f31ec17044ae Mon Sep 17 00:00:00 2001 From: Zhiyu Cheng Date: Sun, 21 Sep 2025 23:54:39 +0000 Subject: [PATCH 1/5] add a copy_custom_model_files function in hf_ptq to copy essential python files which cannot be exported with standard HF methods Signed-off-by: Zhiyu Cheng --- examples/llm_ptq/hf_ptq.py | 58 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/examples/llm_ptq/hf_ptq.py b/examples/llm_ptq/hf_ptq.py index 3effb1d11..3e0c51d06 100755 --- a/examples/llm_ptq/hf_ptq.py +++ b/examples/llm_ptq/hf_ptq.py @@ -16,8 +16,10 @@ import argparse import copy import random +import shutil import time import warnings +from pathlib import Path from typing import Any import numpy as np @@ -83,6 +85,56 @@ mto.enable_huggingface_checkpointing() +def copy_custom_model_files(source_path: str, export_path: str, trust_remote_code: bool = False): + """Copy custom model files (configuration_*.py, modeling_*.py, etc.) from source to export directory. + + Args: + source_path: Path to the original model directory + export_path: Path to the exported model directory + trust_remote_code: Whether trust_remote_code was used (only copy files if True) + """ + if not trust_remote_code: + return + + source_dir = Path(source_path) + export_dir = Path(export_path) + + if not source_dir.exists(): + print(f"Warning: Source directory {source_path} does not exist") + return + + if not export_dir.exists(): + print(f"Warning: Export directory {export_path} does not exist") + return + + # Common patterns for custom model files that need to be copied + custom_file_patterns = [ + "configuration_*.py", + "modeling_*.py", + "tokenization_*.py", + "processing_*.py", + "image_processing_*.py", + "feature_extraction_*.py", + ] + + copied_files = [] + for pattern in custom_file_patterns: + for file_path in source_dir.glob(pattern): + if file_path.is_file(): + dest_path = export_dir / file_path.name + try: + shutil.copy2(file_path, dest_path) + copied_files.append(file_path.name) + print(f"Copied custom model file: {file_path.name}") + except Exception as e: + print(f"Warning: Failed to copy {file_path.name}: {e}") + + if copied_files: + print(f"Successfully copied {len(copied_files)} custom model files to {export_path}") + else: + print("No custom model files found to copy") + + def auto_quantize( model, qformat, auto_quantize_bits, calib_dataloader, calibrate_loop, batch_size=1 ): @@ -604,6 +656,9 @@ def output_decode(generated_ids, input_shape): inference_tensor_parallel=args.inference_tensor_parallel, inference_pipeline_parallel=args.inference_pipeline_parallel, ) + + # Copy custom model files for TensorRT-LLM export as well + copy_custom_model_files(args.pyt_ckpt_path, export_path, args.trust_remote_code) else: # Check arguments for unified_hf export format and set to default if unsupported arguments are provided assert args.sparsity_fmt == "dense", ( @@ -621,6 +676,9 @@ def output_decode(generated_ids, input_shape): export_dir=export_path, ) + # Copy custom model files (configuration_*.py, modeling_*.py, etc.) if trust_remote_code is used + copy_custom_model_files(args.pyt_ckpt_path, export_path, args.trust_remote_code) + # Restore default padding and export the tokenizer as well. if tokenizer is not None: tokenizer.padding_side = default_padding_side From 3b0f98a1c08d9d518a32ceded6797d18981eb214 Mon Sep 17 00:00:00 2001 From: Zhiyu Cheng Date: Wed, 24 Sep 2025 20:34:55 +0000 Subject: [PATCH 2/5] move customized copy function to example utils Signed-off-by: Zhiyu Cheng --- examples/llm_ptq/example_utils.py | 52 ++++++++++++++++++++++++++ examples/llm_ptq/hf_ptq.py | 61 ++++--------------------------- 2 files changed, 60 insertions(+), 53 deletions(-) diff --git a/examples/llm_ptq/example_utils.py b/examples/llm_ptq/example_utils.py index 3ac167db2..c9ac0f2a4 100755 --- a/examples/llm_ptq/example_utils.py +++ b/examples/llm_ptq/example_utils.py @@ -14,8 +14,10 @@ # limitations under the License. import os +import shutil import sys import warnings +from pathlib import Path from typing import Any import torch @@ -263,3 +265,53 @@ def apply_kv_cache_quant(quant_cfg: dict[str, Any], kv_cache_quant_cfg: dict[str quant_cfg["algorithm"] = "max" return quant_cfg + + +def copy_custom_model_files(source_path: str, export_path: str, trust_remote_code: bool = False): + """Copy custom model files (configuration_*.py, modeling_*.py, etc.) from source to export directory. + + Args: + source_path: Path to the original model directory + export_path: Path to the exported model directory + trust_remote_code: Whether trust_remote_code was used (only copy files if True) + """ + if not trust_remote_code: + return + + source_dir = Path(source_path) + export_dir = Path(export_path) + + if not source_dir.exists(): + print(f"Warning: Source directory {source_path} does not exist") + return + + if not export_dir.exists(): + print(f"Warning: Export directory {export_path} does not exist") + return + + # Common patterns for custom model files that need to be copied + custom_file_patterns = [ + "configuration_*.py", + "modeling_*.py", + "tokenization_*.py", + "processing_*.py", + "image_processing_*.py", + "feature_extraction_*.py", + ] + + copied_files = [] + for pattern in custom_file_patterns: + for file_path in source_dir.glob(pattern): + if file_path.is_file(): + dest_path = export_dir / file_path.name + try: + shutil.copy2(file_path, dest_path) + copied_files.append(file_path.name) + print(f"Copied custom model file: {file_path.name}") + except Exception as e: + print(f"Warning: Failed to copy {file_path.name}: {e}") + + if copied_files: + print(f"Successfully copied {len(copied_files)} custom model files to {export_path}") + else: + print("No custom model files found to copy") diff --git a/examples/llm_ptq/hf_ptq.py b/examples/llm_ptq/hf_ptq.py index 3e0c51d06..5aae77968 100755 --- a/examples/llm_ptq/hf_ptq.py +++ b/examples/llm_ptq/hf_ptq.py @@ -16,16 +16,21 @@ import argparse import copy import random -import shutil import time import warnings -from pathlib import Path from typing import Any import numpy as np import torch from accelerate.hooks import remove_hook_from_module -from example_utils import apply_kv_cache_quant, get_model, get_processor, get_tokenizer, is_enc_dec +from example_utils import ( + apply_kv_cache_quant, + copy_custom_model_files, + get_model, + get_processor, + get_tokenizer, + is_enc_dec, +) from transformers import ( AutoConfig, AutoModelForCausalLM, @@ -85,56 +90,6 @@ mto.enable_huggingface_checkpointing() -def copy_custom_model_files(source_path: str, export_path: str, trust_remote_code: bool = False): - """Copy custom model files (configuration_*.py, modeling_*.py, etc.) from source to export directory. - - Args: - source_path: Path to the original model directory - export_path: Path to the exported model directory - trust_remote_code: Whether trust_remote_code was used (only copy files if True) - """ - if not trust_remote_code: - return - - source_dir = Path(source_path) - export_dir = Path(export_path) - - if not source_dir.exists(): - print(f"Warning: Source directory {source_path} does not exist") - return - - if not export_dir.exists(): - print(f"Warning: Export directory {export_path} does not exist") - return - - # Common patterns for custom model files that need to be copied - custom_file_patterns = [ - "configuration_*.py", - "modeling_*.py", - "tokenization_*.py", - "processing_*.py", - "image_processing_*.py", - "feature_extraction_*.py", - ] - - copied_files = [] - for pattern in custom_file_patterns: - for file_path in source_dir.glob(pattern): - if file_path.is_file(): - dest_path = export_dir / file_path.name - try: - shutil.copy2(file_path, dest_path) - copied_files.append(file_path.name) - print(f"Copied custom model file: {file_path.name}") - except Exception as e: - print(f"Warning: Failed to copy {file_path.name}: {e}") - - if copied_files: - print(f"Successfully copied {len(copied_files)} custom model files to {export_path}") - else: - print("No custom model files found to copy") - - def auto_quantize( model, qformat, auto_quantize_bits, calib_dataloader, calibrate_loop, batch_size=1 ): From 4aa4285f1558d9c82c949fc1f6908c7a2aeda877 Mon Sep 17 00:00:00 2001 From: Zhiyu Cheng Date: Thu, 25 Sep 2025 00:41:57 +0000 Subject: [PATCH 3/5] add a util function to resolve model path Signed-off-by: Zhiyu Cheng --- examples/llm_ptq/example_utils.py | 91 ++++++++++++++++++++++++++++++- 1 file changed, 88 insertions(+), 3 deletions(-) diff --git a/examples/llm_ptq/example_utils.py b/examples/llm_ptq/example_utils.py index c9ac0f2a4..cfbe6a90c 100755 --- a/examples/llm_ptq/example_utils.py +++ b/examples/llm_ptq/example_utils.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import glob import os import shutil import sys @@ -26,6 +27,11 @@ from accelerate.utils import get_max_memory from transformers import AutoConfig, AutoModelForCausalLM, AutoProcessor, AutoTokenizer +try: + from huggingface_hub import snapshot_download +except ImportError: + snapshot_download = None + from modelopt.torch.utils.image_processor import MllamaImageProcessor SPECULATIVE_MODEL_LIST = ["Eagle", "Medusa"] @@ -267,22 +273,101 @@ def apply_kv_cache_quant(quant_cfg: dict[str, Any], kv_cache_quant_cfg: dict[str return quant_cfg +def _resolve_model_path(model_name_or_path: str, trust_remote_code: bool = False) -> str: + """Resolve a model name or path to a local directory path. + + If the input is already a local directory, returns it as-is. + If the input is a HuggingFace model ID, attempts to resolve it to the local cache path. + + Args: + model_name_or_path: Either a local directory path or HuggingFace model ID + trust_remote_code: Whether to trust remote code when loading the model + + Returns: + Local directory path to the model files + """ + # If it's already a local directory, return as-is + if os.path.isdir(model_name_or_path): + return model_name_or_path + + # Try to resolve HuggingFace model ID to local cache path + try: + # First try to load the config to trigger caching + config = AutoConfig.from_pretrained(model_name_or_path, trust_remote_code=trust_remote_code) + + # The config object should have the local path information + # Try different ways to get the cached path + if hasattr(config, "_name_or_path") and os.path.isdir(config._name_or_path): + return config._name_or_path + + # Alternative: use snapshot_download if available + if snapshot_download is not None: + try: + local_path = snapshot_download( + repo_id=model_name_or_path, + allow_patterns=["*.py", "*.json"], # Only download Python files and config + ) + return local_path + except Exception as e: + print(f"Warning: Could not download model files using snapshot_download: {e}") + + # Fallback: try to find in HuggingFace cache + from transformers.utils import TRANSFORMERS_CACHE + + # Look for the model in the cache directory + cache_pattern = os.path.join(TRANSFORMERS_CACHE, "models--*") + cache_dirs = glob.glob(cache_pattern) + + # Convert model name to cache directory format + model_cache_name = model_name_or_path.replace("/", "--") + for cache_dir in cache_dirs: + if model_cache_name in cache_dir: + # Look for the snapshots directory + snapshots_dir = os.path.join(cache_dir, "snapshots") + if os.path.exists(snapshots_dir): + # Get the latest snapshot + snapshot_dirs = [ + d + for d in os.listdir(snapshots_dir) + if os.path.isdir(os.path.join(snapshots_dir, d)) + ] + if snapshot_dirs: + latest_snapshot = max(snapshot_dirs) # Use lexicographically latest + snapshot_path = os.path.join(snapshots_dir, latest_snapshot) + return snapshot_path + + except Exception as e: + print(f"Warning: Could not resolve model path for {model_name_or_path}: {e}") + + # If all else fails, return the original path + # This will cause the copy function to skip with a warning + return model_name_or_path + + def copy_custom_model_files(source_path: str, export_path: str, trust_remote_code: bool = False): """Copy custom model files (configuration_*.py, modeling_*.py, etc.) from source to export directory. Args: - source_path: Path to the original model directory + source_path: Path to the original model directory or HuggingFace model ID export_path: Path to the exported model directory trust_remote_code: Whether trust_remote_code was used (only copy files if True) """ if not trust_remote_code: return - source_dir = Path(source_path) + # Resolve the source path (handles both local paths and HF model IDs) + resolved_source_path = _resolve_model_path(source_path, trust_remote_code) + + source_dir = Path(resolved_source_path) export_dir = Path(export_path) if not source_dir.exists(): - print(f"Warning: Source directory {source_path} does not exist") + if resolved_source_path != source_path: + print( + f"Warning: Could not find local cache for HuggingFace model '{source_path}' (resolved to '{resolved_source_path}')" + ) + else: + print(f"Warning: Source directory '{source_path}' does not exist") return if not export_dir.exists(): From 9d305fab22a51988e511cd00305062291c826afd Mon Sep 17 00:00:00 2001 From: Zhiyu Cheng Date: Thu, 25 Sep 2025 00:43:38 +0000 Subject: [PATCH 4/5] add a util function to resolve model path Signed-off-by: Zhiyu Cheng --- examples/llm_ptq/example_utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/examples/llm_ptq/example_utils.py b/examples/llm_ptq/example_utils.py index cfbe6a90c..6a541e32a 100755 --- a/examples/llm_ptq/example_utils.py +++ b/examples/llm_ptq/example_utils.py @@ -364,7 +364,8 @@ def copy_custom_model_files(source_path: str, export_path: str, trust_remote_cod if not source_dir.exists(): if resolved_source_path != source_path: print( - f"Warning: Could not find local cache for HuggingFace model '{source_path}' (resolved to '{resolved_source_path}')" + f"Warning: Could not find local cache for HuggingFace model '{source_path}' " + f"(resolved to '{resolved_source_path}')" ) else: print(f"Warning: Source directory '{source_path}' does not exist") From 21fa47f5c44a66512e082845903e62c1d12cd47b Mon Sep 17 00:00:00 2001 From: Zhiyu Cheng Date: Thu, 25 Sep 2025 18:41:02 +0000 Subject: [PATCH 5/5] copy over json files in export path as well Signed-off-by: Zhiyu Cheng --- examples/llm_ptq/example_utils.py | 10 +++++++++- examples/llm_ptq/hf_ptq.py | 4 ++-- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/examples/llm_ptq/example_utils.py b/examples/llm_ptq/example_utils.py index 6a541e32a..ea08ff699 100755 --- a/examples/llm_ptq/example_utils.py +++ b/examples/llm_ptq/example_utils.py @@ -345,7 +345,11 @@ def _resolve_model_path(model_name_or_path: str, trust_remote_code: bool = False def copy_custom_model_files(source_path: str, export_path: str, trust_remote_code: bool = False): - """Copy custom model files (configuration_*.py, modeling_*.py, etc.) from source to export directory. + """Copy custom model files (configuration_*.py, modeling_*.py, *.json, etc.) from source to export directory. + + This function copies custom Python files and JSON configuration files that are needed for + models with custom code. It excludes config.json and model.safetensors.index.json as these + are typically handled separately by the model export process. Args: source_path: Path to the original model directory or HuggingFace model ID @@ -383,12 +387,16 @@ def copy_custom_model_files(source_path: str, export_path: str, trust_remote_cod "processing_*.py", "image_processing_*.py", "feature_extraction_*.py", + "*.json", ] copied_files = [] for pattern in custom_file_patterns: for file_path in source_dir.glob(pattern): if file_path.is_file(): + # Skip config.json and model.safetensors.index.json as they're handled separately + if file_path.name in ["config.json", "model.safetensors.index.json"]: + continue dest_path = export_dir / file_path.name try: shutil.copy2(file_path, dest_path) diff --git a/examples/llm_ptq/hf_ptq.py b/examples/llm_ptq/hf_ptq.py index 5aae77968..3ef40d4b3 100755 --- a/examples/llm_ptq/hf_ptq.py +++ b/examples/llm_ptq/hf_ptq.py @@ -612,7 +612,7 @@ def output_decode(generated_ids, input_shape): inference_pipeline_parallel=args.inference_pipeline_parallel, ) - # Copy custom model files for TensorRT-LLM export as well + # Copy custom model files (Python files and JSON configs) for TensorRT-LLM export copy_custom_model_files(args.pyt_ckpt_path, export_path, args.trust_remote_code) else: # Check arguments for unified_hf export format and set to default if unsupported arguments are provided @@ -631,7 +631,7 @@ def output_decode(generated_ids, input_shape): export_dir=export_path, ) - # Copy custom model files (configuration_*.py, modeling_*.py, etc.) if trust_remote_code is used + # Copy custom model files (Python files and JSON configs) if trust_remote_code is used copy_custom_model_files(args.pyt_ckpt_path, export_path, args.trust_remote_code) # Restore default padding and export the tokenizer as well.