Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 8 additions & 59 deletions examples/llm_ptq/example_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import glob
import os
import shutil
import sys
Expand All @@ -33,6 +32,7 @@
snapshot_download = None

from modelopt.torch.utils.image_processor import MllamaImageProcessor
from modelopt.torch.utils.model_path_utils import resolve_model_path

SPECULATIVE_MODEL_LIST = ["Eagle", "Medusa"]

Expand Down Expand Up @@ -259,8 +259,7 @@ def apply_kv_cache_quant(quant_cfg: dict[str, Any], kv_cache_quant_cfg: dict[str
def _resolve_model_path(model_name_or_path: str, trust_remote_code: bool = False) -> str:
"""Resolve a model name or path to a local directory path.

If the input is already a local directory, returns it as-is.
If the input is a HuggingFace model ID, attempts to resolve it to the local cache path.
This function is now a wrapper around the unified resolve_model_path utility.

Args:
model_name_or_path: Either a local directory path or HuggingFace model ID
Expand All @@ -269,62 +268,12 @@ def _resolve_model_path(model_name_or_path: str, trust_remote_code: bool = False
Returns:
Local directory path to the model files
"""
# If it's already a local directory, return as-is
if os.path.isdir(model_name_or_path):
return model_name_or_path

# Try to resolve HuggingFace model ID to local cache path
try:
# First try to load the config to trigger caching
config = AutoConfig.from_pretrained(model_name_or_path, trust_remote_code=trust_remote_code)

# The config object should have the local path information
# Try different ways to get the cached path
if hasattr(config, "_name_or_path") and os.path.isdir(config._name_or_path):
return config._name_or_path

# Alternative: use snapshot_download if available
if snapshot_download is not None:
try:
local_path = snapshot_download(
repo_id=model_name_or_path,
allow_patterns=["*.py", "*.json"], # Only download Python files and config
)
return local_path
except Exception as e:
print(f"Warning: Could not download model files using snapshot_download: {e}")

# Fallback: try to find in HuggingFace cache
from transformers.utils import TRANSFORMERS_CACHE

# Look for the model in the cache directory
cache_pattern = os.path.join(TRANSFORMERS_CACHE, "models--*")
cache_dirs = glob.glob(cache_pattern)

# Convert model name to cache directory format
model_cache_name = model_name_or_path.replace("/", "--")
for cache_dir in cache_dirs:
if model_cache_name in cache_dir:
# Look for the snapshots directory
snapshots_dir = os.path.join(cache_dir, "snapshots")
if os.path.exists(snapshots_dir):
# Get the latest snapshot
snapshot_dirs = [
d
for d in os.listdir(snapshots_dir)
if os.path.isdir(os.path.join(snapshots_dir, d))
]
if snapshot_dirs:
latest_snapshot = max(snapshot_dirs) # Use lexicographically latest
snapshot_path = os.path.join(snapshots_dir, latest_snapshot)
return snapshot_path

except Exception as e:
print(f"Warning: Could not resolve model path for {model_name_or_path}: {e}")

# If all else fails, return the original path
# This will cause the copy function to skip with a warning
return model_name_or_path
return resolve_model_path(
model_name_or_path,
trust_remote_code=trust_remote_code,
download_files=True,
allow_patterns=["*.py", "*.json"], # Only download Python files and config
)


def copy_custom_model_files(source_path: str, export_path: str, trust_remote_code: bool = False):
Expand Down
3 changes: 3 additions & 0 deletions examples/llm_ptq/hf_ptq.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,9 @@ def main(args):
**model_kwargs,
)
calibration_only = True

# Store original model path for config restoration
model._original_model_path = args.pyt_ckpt_path
model_is_already_quantized = is_quantized(model)

model_type = get_model_type(model)
Expand Down
74 changes: 74 additions & 0 deletions modelopt/torch/export/model_config_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,17 @@
"""Common utils for the ModelConfig."""

import dataclasses
import json
import math
import warnings
from pathlib import Path
from types import UnionType
from typing import Union, get_args, get_origin

import numpy as np
import torch

from ..utils.model_path_utils import fetch_model_config, is_huggingface_model_id
from .model_config import (
QUANTIZATION_FP8_PC_PT,
QUANTIZATION_INT4_AWQ,
Expand Down Expand Up @@ -227,6 +231,76 @@ def model_config_from_dict(d: dict) -> ModelConfig:
return _from_dict(config_type, d)


def restore_original_rope_scaling(config_data: dict, original_model_path: str) -> dict:
"""Restore original rope_scaling configuration if it was modified by transformers.

Some VLM models like Qwen2.5-VL have their rope_scaling configuration modified
by the transformers library during loading (e.g., from "mrope" to "default" with
additional fields). This function restores the original configuration.

Args:
config_data: The model configuration dictionary to restore
original_model_path: Path to the original model directory or HuggingFace Hub model ID
(e.g., "microsoft/DialoGPT-medium" or "/path/to/local/model")

Returns:
The config_data dictionary with restored rope_scaling (modified in-place)

Note:
This function automatically detects whether original_model_path is a local filesystem
path or a HuggingFace Hub model ID. For Hub model IDs, it will fetch the config.json
directly from the Hub. Requires huggingface_hub package for Hub model ID support.
"""
try:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we just do a copy of the config.json and keep it the same as before?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I prefer the current solution as we can't keep it the same because we need to add the quantization config into config.json.

raw_original_config = None

# Check if original_model_path is a HuggingFace Hub model ID or local path
if is_huggingface_model_id(original_model_path):
# Try to fetch config from HuggingFace Hub
raw_original_config = fetch_model_config(original_model_path)
else:
# Handle as local filesystem path
original_config_file = Path(original_model_path) / "config.json"
if original_config_file.exists():
with open(original_config_file) as f:
raw_original_config = json.load(f)

# If we successfully got the original config, proceed with restoration
if raw_original_config is not None:
# Check if rope_scaling was modified from mrope to default
if (
"rope_scaling" in raw_original_config
and "rope_scaling" in config_data
and raw_original_config["rope_scaling"].get("type") == "mrope"
and config_data["rope_scaling"].get("type") == "default"
and "rope_type" in config_data["rope_scaling"]
):
print(f"Restoring original rope_scaling configuration from {original_model_path}")
config_data["rope_scaling"] = raw_original_config["rope_scaling"]

# Also restore rope_scaling in text_config if it exists
if (
"text_config" in config_data
and "rope_scaling" in config_data["text_config"]
and config_data["text_config"]["rope_scaling"].get("type") == "default"
):
config_data["text_config"]["rope_scaling"] = raw_original_config["rope_scaling"]
elif is_huggingface_model_id(original_model_path):
# Log that we couldn't find the original config
warnings.warn(
f"Could not fetch original config from HuggingFace Hub: {original_model_path}"
)
else:
# Only warn if the local path was expected to exist
original_config_file = Path(original_model_path) / "config.json"
if not original_config_file.exists():
warnings.warn(f"Original config file not found: {original_config_file}")
except Exception as e:
warnings.warn(f"Could not restore original rope_scaling configuration: {e}")

return config_data

Comment on lines +234 to +302
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

🧩 Analysis chain

Restore logic misses text_config-only rope_scaling; unify fetch and avoid print.

If the original config has rope_scaling only under text_config (common in VLMs), current logic won’t restore it because it only checks raw_original_config["rope_scaling"]. Also, prefer using fetch_model_config for both Hub/local and avoid print in library code.

Apply this diff to make restoration robust and logging consistent:

@@
-def restore_original_rope_scaling(config_data: dict, original_model_path: str) -> dict:
+def restore_original_rope_scaling(config_data: dict, original_model_path: str) -> dict:
@@
-    try:
-        raw_original_config = None
-
-        # Check if original_model_path is a HuggingFace Hub model ID or local path
-        if is_huggingface_model_id(original_model_path):
-            # Try to fetch config from HuggingFace Hub
-            raw_original_config = fetch_model_config(original_model_path)
-        else:
-            # Handle as local filesystem path
-            original_config_file = Path(original_model_path) / "config.json"
-            if original_config_file.exists():
-                with open(original_config_file) as f:
-                    raw_original_config = json.load(f)
+    try:
+        # Always use unified fetcher (handles both Hub and local)
+        raw_original_config = fetch_model_config(original_model_path)
@@
-        if raw_original_config is not None:
-            # Check if rope_scaling was modified from mrope to default
-            if (
-                "rope_scaling" in raw_original_config
-                and "rope_scaling" in config_data
-                and raw_original_config["rope_scaling"].get("type") == "mrope"
-                and config_data["rope_scaling"].get("type") == "default"
-                and "rope_type" in config_data["rope_scaling"]
-            ):
-                print(f"Restoring original rope_scaling configuration from {original_model_path}")
-                config_data["rope_scaling"] = raw_original_config["rope_scaling"]
-
-                # Also restore rope_scaling in text_config if it exists
-                if (
-                    "text_config" in config_data
-                    and "rope_scaling" in config_data["text_config"]
-                    and config_data["text_config"]["rope_scaling"].get("type") == "default"
-                ):
-                    config_data["text_config"]["rope_scaling"] = raw_original_config["rope_scaling"]
-        elif is_huggingface_model_id(original_model_path):
-            # Log that we couldn't find the original config
-            warnings.warn(
-                f"Could not fetch original config from HuggingFace Hub: {original_model_path}"
-            )
-        else:
-            # Only warn if the local path was expected to exist
-            original_config_file = Path(original_model_path) / "config.json"
-            if not original_config_file.exists():
-                warnings.warn(f"Original config file not found: {original_config_file}")
+        if raw_original_config is not None:
+            # Original rope may live at top-level or under text_config
+            orig_rope = (
+                raw_original_config.get("rope_scaling")
+                or raw_original_config.get("text_config", {}).get("rope_scaling")
+            )
+            if isinstance(orig_rope, dict) and orig_rope.get("type") == "mrope":
+                def _should_restore(cur: dict | None) -> bool:
+                    return isinstance(cur, dict) and cur.get("type") == "default"
+
+                restored = False
+                # Top-level
+                if _should_restore(config_data.get("rope_scaling")):
+                    config_data["rope_scaling"] = orig_rope
+                    restored = True
+                # Nested text_config
+                if "text_config" in config_data and _should_restore(
+                    config_data["text_config"].get("rope_scaling")
+                ):
+                    config_data["text_config"]["rope_scaling"] = orig_rope
+                    restored = True
+                if restored:
+                    warnings.warn(
+                        f"Restored original rope_scaling (mrope) from {original_model_path}"
+                    )
+        else:
+            warnings.warn(f"Could not load original config for: {original_model_path}")
@@
-    except Exception as e:
+    except Exception as e:
         warnings.warn(f"Could not restore original rope_scaling configuration: {e}")

🏁 Script executed:

#!/bin/bash
# 1) Ensure there is only one definition of restore_original_rope_scaling
rg -nP --type=py '^\s*def\s+restore_original_rope_scaling\s*\('

# 2) Grep for call sites to confirm both top-level and text_config cases may occur
rg -n 'text_config"|'\''text_config'\''|\brope_scaling\b' --type=json -g 'config.json' -S -C1 || true

Length of output: 284


I need to verify whether fetch_model_config() actually supports both Hub model IDs and local paths, and confirm the described logic gap.



🏁 Script executed:

#!/bin/bash
# 1) Find the definition of fetch_model_config
rg -nP --type=py '^\s*def\s+fetch_model_config\s*\(' -A 15

# 2) Check if current code uses print() anywhere
rg -n 'print\s*\(' modelopt/torch/export/model_config_utils.py

# 3) Check the imports at the top of the file to understand available helpers
head -50 modelopt/torch/export/model_config_utils.py

Length of output: 2873


🏁 Script executed:

#!/bin/bash
# 1) Get full implementation of fetch_model_config
rg -nP --type=py '^\s*def\s+fetch_model_config\s*\(' -A 50 modelopt/torch/utils/model_path_utils.py | head -70

# 2) Extract the exact restoration logic from restore_original_rope_scaling
sed -n '250,280p' modelopt/torch/export/model_config_utils.py

Length of output: 3560


Replace redundant fetch logic with unified call; fix logic gap to restore text_config-only rope_scaling; replace print with warning.

The review comment is correct. Verification confirms:

  1. Logic gap exists: Current code only checks "rope_scaling" in raw_original_config at the top level. If the original config has rope_scaling only nested under text_config (common in VLMs), the entire restoration block is skipped.

  2. Redundant branching: fetch_model_config() (imported at line 35) already handles both HuggingFace Hub IDs and local paths internally. Current code at lines 254–266 duplicates this by branching on is_huggingface_model_id() again.

  3. print() in library code: Line 278 uses print() instead of warnings.warn(), which is inappropriate for library code.

The suggested refactor correctly:

  • Unifies to always use fetch_model_config()
  • Checks both top-level and text_config nested rope_scaling
  • Replaces print() with warnings.warn()

Apply the provided diff to resolve all three issues.

🤖 Prompt for AI Agents
In modelopt/torch/export/model_config_utils.py around lines 234-302, replace the
redundant branching that separately handles Hub vs local paths and the print()
call: always call fetch_model_config(original_model_path) (it already handles
Hub and local), then if raw_original_config is not None check and restore
rope_scaling from either raw_original_config["rope_scaling"] or
raw_original_config.get("text_config", {}).get("rope_scaling") (apply to both
config_data top-level and config_data["text_config"] if present) when types
indicate original was "mrope" and current is "default"; replace the print(...)
with warnings.warn(...); if fetch_model_config returns None warn that original
config could not be retrieved.


def pad_weights(weights, tp_size):
"""Returns the padded weights to tp_size."""
assert len(weights.shape) > 1
Expand Down
6 changes: 6 additions & 0 deletions modelopt/torch/export/unified_export_hf.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@
QUANTIZATION_W4A8_AWQ,
QUANTIZATION_W4A8_NVFP4_FP8,
)
from .model_config_utils import restore_original_rope_scaling
from .plugins import export_spec_ckpt_config, export_spec_ckpt_state_dict, spec_opt_only
from .quant_utils import (
fuse_prequant_layernorm,
Expand Down Expand Up @@ -541,6 +542,11 @@ def export_hf_checkpoint(
with open(original_config) as file:
config_data = json.load(file)

# Preserve original rope_scaling configuration if it was modified by transformers
original_model_path = getattr(model, "_original_model_path", None)
if original_model_path is not None:
config_data = restore_original_rope_scaling(config_data, original_model_path)

config_data["quantization_config"] = hf_quant_config

with open(original_config, "w") as file:
Expand Down
1 change: 1 addition & 0 deletions modelopt/torch/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from .import_utils import *
from .list import *
from .logging import *
from .model_path_utils import *
from .network import *
from .perf import *
from .regex import *
Expand Down
Loading
Loading