From 39ccfad50f6b1ab4156b2e75e566f31ec17044ae Mon Sep 17 00:00:00 2001
From: Zhiyu Cheng <zhiyuc@nvidia.com>
Date: Sun, 21 Sep 2025 23:54:39 +0000
Subject: [PATCH 1/5] add a copy_custom_model_files function in hf_ptq to copy
 essential python files which cannot be exported with standard HF methods

Signed-off-by: Zhiyu Cheng <zhiyuc@nvidia.com>
---
 examples/llm_ptq/hf_ptq.py | 58 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 58 insertions(+)

diff --git a/examples/llm_ptq/hf_ptq.py b/examples/llm_ptq/hf_ptq.py
index 3effb1d11..3e0c51d06 100755
--- a/examples/llm_ptq/hf_ptq.py
+++ b/examples/llm_ptq/hf_ptq.py
@@ -16,8 +16,10 @@
 import argparse
 import copy
 import random
+import shutil
 import time
 import warnings
+from pathlib import Path
 from typing import Any
 
 import numpy as np
@@ -83,6 +85,56 @@
 mto.enable_huggingface_checkpointing()
 
 
+def copy_custom_model_files(source_path: str, export_path: str, trust_remote_code: bool = False):
+    """Copy custom model files (configuration_*.py, modeling_*.py, etc.) from source to export directory.
+
+    Args:
+        source_path: Path to the original model directory
+        export_path: Path to the exported model directory
+        trust_remote_code: Whether trust_remote_code was used (only copy files if True)
+    """
+    if not trust_remote_code:
+        return
+
+    source_dir = Path(source_path)
+    export_dir = Path(export_path)
+
+    if not source_dir.exists():
+        print(f"Warning: Source directory {source_path} does not exist")
+        return
+
+    if not export_dir.exists():
+        print(f"Warning: Export directory {export_path} does not exist")
+        return
+
+    # Common patterns for custom model files that need to be copied
+    custom_file_patterns = [
+        "configuration_*.py",
+        "modeling_*.py",
+        "tokenization_*.py",
+        "processing_*.py",
+        "image_processing_*.py",
+        "feature_extraction_*.py",
+    ]
+
+    copied_files = []
+    for pattern in custom_file_patterns:
+        for file_path in source_dir.glob(pattern):
+            if file_path.is_file():
+                dest_path = export_dir / file_path.name
+                try:
+                    shutil.copy2(file_path, dest_path)
+                    copied_files.append(file_path.name)
+                    print(f"Copied custom model file: {file_path.name}")
+                except Exception as e:
+                    print(f"Warning: Failed to copy {file_path.name}: {e}")
+
+    if copied_files:
+        print(f"Successfully copied {len(copied_files)} custom model files to {export_path}")
+    else:
+        print("No custom model files found to copy")
+
+
 def auto_quantize(
     model, qformat, auto_quantize_bits, calib_dataloader, calibrate_loop, batch_size=1
 ):
@@ -604,6 +656,9 @@ def output_decode(generated_ids, input_shape):
                 inference_tensor_parallel=args.inference_tensor_parallel,
                 inference_pipeline_parallel=args.inference_pipeline_parallel,
             )
+
+            # Copy custom model files for TensorRT-LLM export as well
+            copy_custom_model_files(args.pyt_ckpt_path, export_path, args.trust_remote_code)
         else:
             # Check arguments for unified_hf export format and set to default if unsupported arguments are provided
             assert args.sparsity_fmt == "dense", (
@@ -621,6 +676,9 @@ def output_decode(generated_ids, input_shape):
                 export_dir=export_path,
             )
 
+        # Copy custom model files (configuration_*.py, modeling_*.py, etc.) if trust_remote_code is used
+        copy_custom_model_files(args.pyt_ckpt_path, export_path, args.trust_remote_code)
+
         # Restore default padding and export the tokenizer as well.
         if tokenizer is not None:
             tokenizer.padding_side = default_padding_side

From 3b0f98a1c08d9d518a32ceded6797d18981eb214 Mon Sep 17 00:00:00 2001
From: Zhiyu Cheng <zhiyuc@nvidia.com>
Date: Wed, 24 Sep 2025 20:34:55 +0000
Subject: [PATCH 2/5] move customized copy function to example utils

Signed-off-by: Zhiyu Cheng <zhiyuc@nvidia.com>
---
 examples/llm_ptq/example_utils.py | 52 ++++++++++++++++++++++++++
 examples/llm_ptq/hf_ptq.py        | 61 ++++---------------------------
 2 files changed, 60 insertions(+), 53 deletions(-)

diff --git a/examples/llm_ptq/example_utils.py b/examples/llm_ptq/example_utils.py
index 3ac167db2..c9ac0f2a4 100755
--- a/examples/llm_ptq/example_utils.py
+++ b/examples/llm_ptq/example_utils.py
@@ -14,8 +14,10 @@
 # limitations under the License.
 
 import os
+import shutil
 import sys
 import warnings
+from pathlib import Path
 from typing import Any
 
 import torch
@@ -263,3 +265,53 @@ def apply_kv_cache_quant(quant_cfg: dict[str, Any], kv_cache_quant_cfg: dict[str
         quant_cfg["algorithm"] = "max"
 
     return quant_cfg
+
+
+def copy_custom_model_files(source_path: str, export_path: str, trust_remote_code: bool = False):
+    """Copy custom model files (configuration_*.py, modeling_*.py, etc.) from source to export directory.
+
+    Args:
+        source_path: Path to the original model directory
+        export_path: Path to the exported model directory
+        trust_remote_code: Whether trust_remote_code was used (only copy files if True)
+    """
+    if not trust_remote_code:
+        return
+
+    source_dir = Path(source_path)
+    export_dir = Path(export_path)
+
+    if not source_dir.exists():
+        print(f"Warning: Source directory {source_path} does not exist")
+        return
+
+    if not export_dir.exists():
+        print(f"Warning: Export directory {export_path} does not exist")
+        return
+
+    # Common patterns for custom model files that need to be copied
+    custom_file_patterns = [
+        "configuration_*.py",
+        "modeling_*.py",
+        "tokenization_*.py",
+        "processing_*.py",
+        "image_processing_*.py",
+        "feature_extraction_*.py",
+    ]
+
+    copied_files = []
+    for pattern in custom_file_patterns:
+        for file_path in source_dir.glob(pattern):
+            if file_path.is_file():
+                dest_path = export_dir / file_path.name
+                try:
+                    shutil.copy2(file_path, dest_path)
+                    copied_files.append(file_path.name)
+                    print(f"Copied custom model file: {file_path.name}")
+                except Exception as e:
+                    print(f"Warning: Failed to copy {file_path.name}: {e}")
+
+    if copied_files:
+        print(f"Successfully copied {len(copied_files)} custom model files to {export_path}")
+    else:
+        print("No custom model files found to copy")
diff --git a/examples/llm_ptq/hf_ptq.py b/examples/llm_ptq/hf_ptq.py
index 3e0c51d06..5aae77968 100755
--- a/examples/llm_ptq/hf_ptq.py
+++ b/examples/llm_ptq/hf_ptq.py
@@ -16,16 +16,21 @@
 import argparse
 import copy
 import random
-import shutil
 import time
 import warnings
-from pathlib import Path
 from typing import Any
 
 import numpy as np
 import torch
 from accelerate.hooks import remove_hook_from_module
-from example_utils import apply_kv_cache_quant, get_model, get_processor, get_tokenizer, is_enc_dec
+from example_utils import (
+    apply_kv_cache_quant,
+    copy_custom_model_files,
+    get_model,
+    get_processor,
+    get_tokenizer,
+    is_enc_dec,
+)
 from transformers import (
     AutoConfig,
     AutoModelForCausalLM,
@@ -85,56 +90,6 @@
 mto.enable_huggingface_checkpointing()
 
 
-def copy_custom_model_files(source_path: str, export_path: str, trust_remote_code: bool = False):
-    """Copy custom model files (configuration_*.py, modeling_*.py, etc.) from source to export directory.
-
-    Args:
-        source_path: Path to the original model directory
-        export_path: Path to the exported model directory
-        trust_remote_code: Whether trust_remote_code was used (only copy files if True)
-    """
-    if not trust_remote_code:
-        return
-
-    source_dir = Path(source_path)
-    export_dir = Path(export_path)
-
-    if not source_dir.exists():
-        print(f"Warning: Source directory {source_path} does not exist")
-        return
-
-    if not export_dir.exists():
-        print(f"Warning: Export directory {export_path} does not exist")
-        return
-
-    # Common patterns for custom model files that need to be copied
-    custom_file_patterns = [
-        "configuration_*.py",
-        "modeling_*.py",
-        "tokenization_*.py",
-        "processing_*.py",
-        "image_processing_*.py",
-        "feature_extraction_*.py",
-    ]
-
-    copied_files = []
-    for pattern in custom_file_patterns:
-        for file_path in source_dir.glob(pattern):
-            if file_path.is_file():
-                dest_path = export_dir / file_path.name
-                try:
-                    shutil.copy2(file_path, dest_path)
-                    copied_files.append(file_path.name)
-                    print(f"Copied custom model file: {file_path.name}")
-                except Exception as e:
-                    print(f"Warning: Failed to copy {file_path.name}: {e}")
-
-    if copied_files:
-        print(f"Successfully copied {len(copied_files)} custom model files to {export_path}")
-    else:
-        print("No custom model files found to copy")
-
-
 def auto_quantize(
     model, qformat, auto_quantize_bits, calib_dataloader, calibrate_loop, batch_size=1
 ):

From 4aa4285f1558d9c82c949fc1f6908c7a2aeda877 Mon Sep 17 00:00:00 2001
From: Zhiyu Cheng <zhiyuc@nvidia.com>
Date: Thu, 25 Sep 2025 00:41:57 +0000
Subject: [PATCH 3/5] add a util function to resolve model path

Signed-off-by: Zhiyu Cheng <zhiyuc@nvidia.com>
---
 examples/llm_ptq/example_utils.py | 91 ++++++++++++++++++++++++++++++-
 1 file changed, 88 insertions(+), 3 deletions(-)

diff --git a/examples/llm_ptq/example_utils.py b/examples/llm_ptq/example_utils.py
index c9ac0f2a4..cfbe6a90c 100755
--- a/examples/llm_ptq/example_utils.py
+++ b/examples/llm_ptq/example_utils.py
@@ -13,6 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import glob
 import os
 import shutil
 import sys
@@ -26,6 +27,11 @@
 from accelerate.utils import get_max_memory
 from transformers import AutoConfig, AutoModelForCausalLM, AutoProcessor, AutoTokenizer
 
+try:
+    from huggingface_hub import snapshot_download
+except ImportError:
+    snapshot_download = None
+
 from modelopt.torch.utils.image_processor import MllamaImageProcessor
 
 SPECULATIVE_MODEL_LIST = ["Eagle", "Medusa"]
@@ -267,22 +273,101 @@ def apply_kv_cache_quant(quant_cfg: dict[str, Any], kv_cache_quant_cfg: dict[str
     return quant_cfg
 
 
+def _resolve_model_path(model_name_or_path: str, trust_remote_code: bool = False) -> str:
+    """Resolve a model name or path to a local directory path.
+
+    If the input is already a local directory, returns it as-is.
+    If the input is a HuggingFace model ID, attempts to resolve it to the local cache path.
+
+    Args:
+        model_name_or_path: Either a local directory path or HuggingFace model ID
+        trust_remote_code: Whether to trust remote code when loading the model
+
+    Returns:
+        Local directory path to the model files
+    """
+    # If it's already a local directory, return as-is
+    if os.path.isdir(model_name_or_path):
+        return model_name_or_path
+
+    # Try to resolve HuggingFace model ID to local cache path
+    try:
+        # First try to load the config to trigger caching
+        config = AutoConfig.from_pretrained(model_name_or_path, trust_remote_code=trust_remote_code)
+
+        # The config object should have the local path information
+        # Try different ways to get the cached path
+        if hasattr(config, "_name_or_path") and os.path.isdir(config._name_or_path):
+            return config._name_or_path
+
+        # Alternative: use snapshot_download if available
+        if snapshot_download is not None:
+            try:
+                local_path = snapshot_download(
+                    repo_id=model_name_or_path,
+                    allow_patterns=["*.py", "*.json"],  # Only download Python files and config
+                )
+                return local_path
+            except Exception as e:
+                print(f"Warning: Could not download model files using snapshot_download: {e}")
+
+        # Fallback: try to find in HuggingFace cache
+        from transformers.utils import TRANSFORMERS_CACHE
+
+        # Look for the model in the cache directory
+        cache_pattern = os.path.join(TRANSFORMERS_CACHE, "models--*")
+        cache_dirs = glob.glob(cache_pattern)
+
+        # Convert model name to cache directory format
+        model_cache_name = model_name_or_path.replace("/", "--")
+        for cache_dir in cache_dirs:
+            if model_cache_name in cache_dir:
+                # Look for the snapshots directory
+                snapshots_dir = os.path.join(cache_dir, "snapshots")
+                if os.path.exists(snapshots_dir):
+                    # Get the latest snapshot
+                    snapshot_dirs = [
+                        d
+                        for d in os.listdir(snapshots_dir)
+                        if os.path.isdir(os.path.join(snapshots_dir, d))
+                    ]
+                    if snapshot_dirs:
+                        latest_snapshot = max(snapshot_dirs)  # Use lexicographically latest
+                        snapshot_path = os.path.join(snapshots_dir, latest_snapshot)
+                        return snapshot_path
+
+    except Exception as e:
+        print(f"Warning: Could not resolve model path for {model_name_or_path}: {e}")
+
+    # If all else fails, return the original path
+    # This will cause the copy function to skip with a warning
+    return model_name_or_path
+
+
 def copy_custom_model_files(source_path: str, export_path: str, trust_remote_code: bool = False):
     """Copy custom model files (configuration_*.py, modeling_*.py, etc.) from source to export directory.
 
     Args:
-        source_path: Path to the original model directory
+        source_path: Path to the original model directory or HuggingFace model ID
         export_path: Path to the exported model directory
         trust_remote_code: Whether trust_remote_code was used (only copy files if True)
     """
     if not trust_remote_code:
         return
 
-    source_dir = Path(source_path)
+    # Resolve the source path (handles both local paths and HF model IDs)
+    resolved_source_path = _resolve_model_path(source_path, trust_remote_code)
+
+    source_dir = Path(resolved_source_path)
     export_dir = Path(export_path)
 
     if not source_dir.exists():
-        print(f"Warning: Source directory {source_path} does not exist")
+        if resolved_source_path != source_path:
+            print(
+                f"Warning: Could not find local cache for HuggingFace model '{source_path}' (resolved to '{resolved_source_path}')"
+            )
+        else:
+            print(f"Warning: Source directory '{source_path}' does not exist")
         return
 
     if not export_dir.exists():

From 9d305fab22a51988e511cd00305062291c826afd Mon Sep 17 00:00:00 2001
From: Zhiyu Cheng <zhiyuc@nvidia.com>
Date: Thu, 25 Sep 2025 00:43:38 +0000
Subject: [PATCH 4/5] add a util function to resolve model path

Signed-off-by: Zhiyu Cheng <zhiyuc@nvidia.com>
---
 examples/llm_ptq/example_utils.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/examples/llm_ptq/example_utils.py b/examples/llm_ptq/example_utils.py
index cfbe6a90c..6a541e32a 100755
--- a/examples/llm_ptq/example_utils.py
+++ b/examples/llm_ptq/example_utils.py
@@ -364,7 +364,8 @@ def copy_custom_model_files(source_path: str, export_path: str, trust_remote_cod
     if not source_dir.exists():
         if resolved_source_path != source_path:
             print(
-                f"Warning: Could not find local cache for HuggingFace model '{source_path}' (resolved to '{resolved_source_path}')"
+                f"Warning: Could not find local cache for HuggingFace model '{source_path}' "
+                f"(resolved to '{resolved_source_path}')"
             )
         else:
             print(f"Warning: Source directory '{source_path}' does not exist")

From 21fa47f5c44a66512e082845903e62c1d12cd47b Mon Sep 17 00:00:00 2001
From: Zhiyu Cheng <zhiyuc@nvidia.com>
Date: Thu, 25 Sep 2025 18:41:02 +0000
Subject: [PATCH 5/5] copy over json files in export path as well

Signed-off-by: Zhiyu Cheng <zhiyuc@nvidia.com>
---
 examples/llm_ptq/example_utils.py | 10 +++++++++-
 examples/llm_ptq/hf_ptq.py        |  4 ++--
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/examples/llm_ptq/example_utils.py b/examples/llm_ptq/example_utils.py
index 6a541e32a..ea08ff699 100755
--- a/examples/llm_ptq/example_utils.py
+++ b/examples/llm_ptq/example_utils.py
@@ -345,7 +345,11 @@ def _resolve_model_path(model_name_or_path: str, trust_remote_code: bool = False
 
 
 def copy_custom_model_files(source_path: str, export_path: str, trust_remote_code: bool = False):
-    """Copy custom model files (configuration_*.py, modeling_*.py, etc.) from source to export directory.
+    """Copy custom model files (configuration_*.py, modeling_*.py, *.json, etc.) from source to export directory.
+
+    This function copies custom Python files and JSON configuration files that are needed for
+    models with custom code. It excludes config.json and model.safetensors.index.json as these
+    are typically handled separately by the model export process.
 
     Args:
         source_path: Path to the original model directory or HuggingFace model ID
@@ -383,12 +387,16 @@ def copy_custom_model_files(source_path: str, export_path: str, trust_remote_cod
         "processing_*.py",
         "image_processing_*.py",
         "feature_extraction_*.py",
+        "*.json",
     ]
 
     copied_files = []
     for pattern in custom_file_patterns:
         for file_path in source_dir.glob(pattern):
             if file_path.is_file():
+                # Skip config.json and model.safetensors.index.json as they're handled separately
+                if file_path.name in ["config.json", "model.safetensors.index.json"]:
+                    continue
                 dest_path = export_dir / file_path.name
                 try:
                     shutil.copy2(file_path, dest_path)
diff --git a/examples/llm_ptq/hf_ptq.py b/examples/llm_ptq/hf_ptq.py
index 5aae77968..3ef40d4b3 100755
--- a/examples/llm_ptq/hf_ptq.py
+++ b/examples/llm_ptq/hf_ptq.py
@@ -612,7 +612,7 @@ def output_decode(generated_ids, input_shape):
                 inference_pipeline_parallel=args.inference_pipeline_parallel,
             )
 
-            # Copy custom model files for TensorRT-LLM export as well
+            # Copy custom model files (Python files and JSON configs) for TensorRT-LLM export
             copy_custom_model_files(args.pyt_ckpt_path, export_path, args.trust_remote_code)
         else:
             # Check arguments for unified_hf export format and set to default if unsupported arguments are provided
@@ -631,7 +631,7 @@ def output_decode(generated_ids, input_shape):
                 export_dir=export_path,
             )
 
-        # Copy custom model files (configuration_*.py, modeling_*.py, etc.) if trust_remote_code is used
+        # Copy custom model files (Python files and JSON configs) if trust_remote_code is used
         copy_custom_model_files(args.pyt_ckpt_path, export_path, args.trust_remote_code)
 
         # Restore default padding and export the tokenizer as well.