NVIDIA · Edwardf0t1 · Sep 26, 2025 · Sep 21, 2025 · Sep 24, 2025 · Sep 25, 2025
@@ -13,9 +13,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import glob
 import os
+import shutil
 import sys
 import warnings
+from pathlib import Path
 from typing import Any
 
 import torch
@@ -24,6 +27,11 @@
 from accelerate.utils import get_max_memory
 from transformers import AutoConfig, AutoModelForCausalLM, AutoProcessor, AutoTokenizer
 
+try:
+    from huggingface_hub import snapshot_download
+except ImportError:
+    snapshot_download = None
+
 from modelopt.torch.utils.image_processor import MllamaImageProcessor
 
 SPECULATIVE_MODEL_LIST = ["Eagle", "Medusa"]
@@ -263,3 +271,141 @@ def apply_kv_cache_quant(quant_cfg: dict[str, Any], kv_cache_quant_cfg: dict[str
         quant_cfg["algorithm"] = "max"
 
     return quant_cfg
+
+
+def _resolve_model_path(model_name_or_path: str, trust_remote_code: bool = False) -> str:
+    """Resolve a model name or path to a local directory path.
+
+    If the input is already a local directory, returns it as-is.
+    If the input is a HuggingFace model ID, attempts to resolve it to the local cache path.
+
+    Args:
+        model_name_or_path: Either a local directory path or HuggingFace model ID
+        trust_remote_code: Whether to trust remote code when loading the model
+
+    Returns:
+        Local directory path to the model files
+    """
+    # If it's already a local directory, return as-is
+    if os.path.isdir(model_name_or_path):
+        return model_name_or_path
+
+    # Try to resolve HuggingFace model ID to local cache path
+    try:
+        # First try to load the config to trigger caching
+        config = AutoConfig.from_pretrained(model_name_or_path, trust_remote_code=trust_remote_code)
+
+        # The config object should have the local path information
+        # Try different ways to get the cached path
+        if hasattr(config, "_name_or_path") and os.path.isdir(config._name_or_path):
+            return config._name_or_path
+
+        # Alternative: use snapshot_download if available
+        if snapshot_download is not None:
+            try:
+                local_path = snapshot_download(
+                    repo_id=model_name_or_path,
+                    allow_patterns=["*.py", "*.json"],  # Only download Python files and config
+                )
+                return local_path
+            except Exception as e:
+                print(f"Warning: Could not download model files using snapshot_download: {e}")
+
+        # Fallback: try to find in HuggingFace cache
+        from transformers.utils import TRANSFORMERS_CACHE
+
+        # Look for the model in the cache directory
+        cache_pattern = os.path.join(TRANSFORMERS_CACHE, "models--*")
+        cache_dirs = glob.glob(cache_pattern)
+
+        # Convert model name to cache directory format
+        model_cache_name = model_name_or_path.replace("/", "--")
+        for cache_dir in cache_dirs:
+            if model_cache_name in cache_dir:
+                # Look for the snapshots directory
+                snapshots_dir = os.path.join(cache_dir, "snapshots")
+                if os.path.exists(snapshots_dir):
+                    # Get the latest snapshot
+                    snapshot_dirs = [
+                        d
+                        for d in os.listdir(snapshots_dir)
+                        if os.path.isdir(os.path.join(snapshots_dir, d))
+                    ]
+                    if snapshot_dirs:
+                        latest_snapshot = max(snapshot_dirs)  # Use lexicographically latest
+                        snapshot_path = os.path.join(snapshots_dir, latest_snapshot)
+                        return snapshot_path
+
+    except Exception as e:
+        print(f"Warning: Could not resolve model path for {model_name_or_path}: {e}")
+
+    # If all else fails, return the original path
+    # This will cause the copy function to skip with a warning
+    return model_name_or_path
+
+
+def copy_custom_model_files(source_path: str, export_path: str, trust_remote_code: bool = False):
+    """Copy custom model files (configuration_*.py, modeling_*.py, *.json, etc.) from source to export directory.
+
+    This function copies custom Python files and JSON configuration files that are needed for
+    models with custom code. It excludes config.json and model.safetensors.index.json as these
+    are typically handled separately by the model export process.
+
+    Args:
+        source_path: Path to the original model directory or HuggingFace model ID
+        export_path: Path to the exported model directory
+        trust_remote_code: Whether trust_remote_code was used (only copy files if True)
+    """
+    if not trust_remote_code:
+        return
+
+    # Resolve the source path (handles both local paths and HF model IDs)
+    resolved_source_path = _resolve_model_path(source_path, trust_remote_code)
+
+    source_dir = Path(resolved_source_path)
+    export_dir = Path(export_path)
+
+    if not source_dir.exists():
+        if resolved_source_path != source_path:
+            print(
+                f"Warning: Could not find local cache for HuggingFace model '{source_path}' "
+                f"(resolved to '{resolved_source_path}')"
+            )
+        else:
+            print(f"Warning: Source directory '{source_path}' does not exist")
+        return
+
+    if not export_dir.exists():
+        print(f"Warning: Export directory {export_path} does not exist")
+        return
+
+    # Common patterns for custom model files that need to be copied
+    custom_file_patterns = [
+        "configuration_*.py",
+        "modeling_*.py",
+        "tokenization_*.py",
+        "processing_*.py",
+        "image_processing_*.py",
+        "feature_extraction_*.py",
+        "*.json",
+    ]
+
+    copied_files = []
+    for pattern in custom_file_patterns:
+        for file_path in source_dir.glob(pattern):
+            if file_path.is_file():
+                # Skip config.json and model.safetensors.index.json as they're handled separately
+                if file_path.name in ["config.json", "model.safetensors.index.json"]:
+                    continue
+                dest_path = export_dir / file_path.name
+                try:
+                    shutil.copy2(file_path, dest_path)
+                    copied_files.append(file_path.name)
+                    print(f"Copied custom model file: {file_path.name}")
+                except Exception as e:
+                    print(f"Warning: Failed to copy {file_path.name}: {e}")
+
+    if copied_files:
+        print(f"Successfully copied {len(copied_files)} custom model files to {export_path}")
+    else:
+        print("No custom model files found to copy")
@@ -23,7 +23,14 @@
 import numpy as np
 import torch
 from accelerate.hooks import remove_hook_from_module
-from example_utils import apply_kv_cache_quant, get_model, get_processor, get_tokenizer, is_enc_dec
+from example_utils import (
+    apply_kv_cache_quant,
+    copy_custom_model_files,
+    get_model,
+    get_processor,
+    get_tokenizer,
+    is_enc_dec,
+)
 from transformers import (
     AutoConfig,
     AutoModelForCausalLM,
@@ -604,6 +611,9 @@ def output_decode(generated_ids, input_shape):
                 inference_tensor_parallel=args.inference_tensor_parallel,
                 inference_pipeline_parallel=args.inference_pipeline_parallel,
             )
+
+            # Copy custom model files (Python files and JSON configs) for TensorRT-LLM export
+            copy_custom_model_files(args.pyt_ckpt_path, export_path, args.trust_remote_code)
         else:
             # Check arguments for unified_hf export format and set to default if unsupported arguments are provided
             assert args.sparsity_fmt == "dense", (
@@ -621,6 +631,9 @@ def output_decode(generated_ids, input_shape):
                 export_dir=export_path,
             )
 
+        # Copy custom model files (Python files and JSON configs) if trust_remote_code is used
+        copy_custom_model_files(args.pyt_ckpt_path, export_path, args.trust_remote_code)
+
         # Restore default padding and export the tokenizer as well.
         if tokenizer is not None:
             tokenizer.padding_side = default_padding_side