Address security concerns in code

kevalmorabia97 · kevalmorabia97 · commit 45b288f6bb38 · 2025-12-02T17:37:58.000+05:30
Signed-off-by: Keval Morabia &lt;28916987+kevalmorabia97@users.noreply.github.com&gt;
diff --git a/docs/source/guides/2_save_load.rst b/docs/source/guides/2_save_load.rst
@@ -129,6 +129,7 @@ Here is the example workflow of restoring the ModelOpt-modified model architectu
     model = ...
 
     # Restore the model architecture using the saved `modelopt_state`
+    # Security NOTE: weights_only=False is used here on ModelOpt-generated state_dict, not on untrusted user input
     modelopt_state = torch.load("modelopt_state.pth", weights_only=False)
     model = mto.restore_from_modelopt_state(model, modelopt_state)
 
diff --git a/examples/llm_qat/export.py b/examples/llm_qat/export.py
@@ -51,6 +51,7 @@ def get_model(
 
     # Restore modelopt state for LoRA models. For QAT/QAD models from_pretrained call handles this
     if hasattr(model, "peft_config"):
+        # Security NOTE: weights_only=False is used here on ModelOpt-generated state_dict, not on untrusted user input
         modelopt_state = torch.load(f"{ckpt_path}/modelopt_state_train.pth", weights_only=False)
         restore_from_modelopt_state(model, modelopt_state)
         print_rank_0("Restored modelopt state")
diff --git a/examples/llm_sparsity/README.md b/examples/llm_sparsity/README.md
@@ -84,7 +84,7 @@ python data_prep.py --save_path data
 
 The following command demonstrates how to perform SAT on the Llama2-7B model on 8 GPUs.
 The model is finetuned on the [cnn_dailymail](https://huggingface.co/datasets/abisee/cnn_dailymail) dataset for 3 epochs.
-The input data is tokenized to a maximum length of 1024 tokens. The tokenized data is saved as a pickle file for faster data loading. The one-time process takes less than an hour to finish depending on the CPU. The resulting pickle file can be utilized for future training sessions.
+The input data is tokenized to a maximum length of 1024 tokens.
 
 ```sh
 bash launch_finetune.sh --model meta-llama/Llama-2-7b-hf \
diff --git a/examples/llm_sparsity/finetune.py b/examples/llm_sparsity/finetune.py
@@ -32,7 +32,6 @@
 import argparse
 import copy
 import os
-import pickle
 from collections.abc import Sequence
 from dataclasses import dataclass, field
 
@@ -232,27 +231,17 @@ def __init__(
     ):
         super().__init__()
 
-        pickle_name = f"dict_{split}_{tokenizer.model_max_length}.pickle"
         with training_args.main_process_first():
-            if os.path.isfile(pickle_name):
-                with open(pickle_name, "rb") as f:
-                    print_rank_0("Reuse pickled data")
-                    data_dict = pickle.load(f)
-            else:
-                print_rank_0("Loading data...")
-                list_data_dict = utils.jload(data_path)
-
-                print_rank_0("Formatting inputs...")
-                prompt_input = PROMPT_DICT["prompt_input"]
-                sources = [prompt_input.format_map(example) for example in list_data_dict]
-                targets = [
-                    f"{example['output']}{tokenizer.eos_token}" for example in list_data_dict
-                ]
-
-                print_rank_0("Tokenizing inputs... This may take some time...")
-                data_dict = preprocess(sources, targets, tokenizer)
-                with open(pickle_name, "wb") as f:
-                    pickle.dump(data_dict, f, pickle.HIGHEST_PROTOCOL)
+            print_rank_0("Loading data...")
+            list_data_dict = utils.jload(data_path)
+
+            print_rank_0("Formatting inputs...")
+            prompt_input = PROMPT_DICT["prompt_input"]
+            sources = [prompt_input.format_map(example) for example in list_data_dict]
+            targets = [f"{example['output']}{tokenizer.eos_token}" for example in list_data_dict]
+
+            print_rank_0("Tokenizing inputs... This may take some time...")
+            data_dict = preprocess(sources, targets, tokenizer)
 
         self.input_ids = data_dict["input_ids"]
         self.labels = data_dict["labels"]
diff --git a/modelopt/onnx/quantization/__main__.py b/modelopt/onnx/quantization/__main__.py
@@ -52,6 +52,11 @@ def get_parser() -> argparse.ArgumentParser:
         type=str,
         help="Calibration data in npz/npy format. If None, random data for calibration will be used.",
     )
+    group.add_argument(
+        "--trust_calibration_data",
+        action="store_true",
+        help="If True, trust the calibration data and allow pickle deserialization.",
+    )
     group.add_argument(
         "--calibration_cache_path",
         type=str,
@@ -263,10 +268,23 @@ def main():
     args = get_parser().parse_args()
     calibration_data = None
     if args.calibration_data_path:
-        calibration_data = np.load(args.calibration_data_path, allow_pickle=True)
-        if args.calibration_data_path.endswith(".npz"):
-            # Convert the NpzFile object to a Python dictionary
-            calibration_data = {key: calibration_data[key] for key in calibration_data.files}
+        # Security: Disable pickle deserialization for untrusted sources to prevent RCE attacks
+        try:
+            calibration_data = np.load(
+                args.calibration_data_path, allow_pickle=args.trust_calibration_data
+            )
+            if args.calibration_data_path.endswith(".npz"):
+                # Convert the NpzFile object to a Python dictionary
+                calibration_data = {key: calibration_data[key] for key in calibration_data.files}
+        except ValueError as e:
+            if "allow_pickle" in str(e) and not args.trust_calibration_data:
+                raise ValueError(
+                    "Calibration data file contains pickled objects which pose a security risk. "
+                    "For trusted sources, you may enable pickle deserialization by setting the "
+                    "--trust_calibration_data flag."
+                ) from e
+            else:
+                raise
 
     quantize(
         args.onnx_path,
diff --git a/modelopt/torch/export/distribute.py b/modelopt/torch/export/distribute.py
@@ -91,6 +91,7 @@ def read_configs_and_weights_from_rank(
             raise ValueError("NFSWorkspace is not initialized!")
         state_path = self._get_state_path(target_rank)
         if state_path.exists():
+            # Security NOTE: weights_only=False is used here on ModelOpt-generated ckpt, not on untrusted user input
             state = torch.load(state_path, map_location="cpu", weights_only=False)
             return state["config"], state["weight"]
         else:
diff --git a/modelopt/torch/opt/conversion.py b/modelopt/torch/opt/conversion.py
@@ -526,6 +526,7 @@ def restore_from_modelopt_state(model: ModelLike, modelopt_state: dict[str, Any]
         model = ...  # Create the model-like object
 
         # Restore the previously saved modelopt state followed by model weights
+        # Security NOTE: weights_only=False is used here on ModelOpt-generated state_dict, not on untrusted user input
         mto.restore_from_modelopt_state(
             model, torch.load("modelopt_state.pt", weights_only=False)
         )  # Restore modelopt state
diff --git a/modelopt/torch/opt/plugins/huggingface.py b/modelopt/torch/opt/plugins/huggingface.py
@@ -79,6 +79,7 @@ def new_init_fn(self, *args, **kwargs):
         modelopt_state_path = _get_modelopt_state_path(model_path)
         _original__init__(self, *args, **kwargs)
         if os.path.isfile(modelopt_state_path):
+            # Security NOTE: weights_only=False is used on ModelOpt-generated state_dict, not on untrusted user input
             modelopt_state = torch.load(modelopt_state_path, map_location="cpu", weights_only=False)
             with extra_context() if extra_context else nullcontext():
                 restore_from_modelopt_state(self, modelopt_state)
diff --git a/modelopt/torch/opt/plugins/mcore_dist_checkpointing.py b/modelopt/torch/opt/plugins/mcore_dist_checkpointing.py
@@ -242,6 +242,7 @@ def restore_sharded_modelopt_state(
         return
 
     # Loading the common modelopt_state (replicated on all ranks)
+    # Security NOTE: weights_only=False is used here on NVIDIA-generated file, not on untrusted user input
     common_modelopt_state = torch.load(
         modelopt_checkpoint_name + "/" + COMMON_STATE_FNAME, weights_only=False
     )
diff --git a/modelopt/torch/opt/plugins/megatron.py b/modelopt/torch/opt/plugins/megatron.py
@@ -15,9 +15,10 @@
 """Support quantization and save/resore for Megatron."""
 
 import contextlib
-import pickle  # nosec
+import io
 import types
 from typing import Any
+from warnings import warn
 
 import megatron.core.transformer.mlp as megatron_mlp
 import regex as re
@@ -26,6 +27,74 @@
 from ..dynamic import DynamicModule
 
 
+def _convert_dtypes_to_strings(obj: Any) -> Any:
+    """Convert torch.dtype to strings for JSON-safe serialization."""
+    if isinstance(obj, torch.dtype):
+        return {"__dtype__": str(obj)}
+    elif isinstance(obj, dict):
+        return {k: _convert_dtypes_to_strings(v) for k, v in obj.items()}
+    elif isinstance(obj, (list, tuple)):
+        converted = [_convert_dtypes_to_strings(item) for item in obj]
+        return {"__tuple__": converted} if isinstance(obj, tuple) else converted
+    return obj
+
+
+def _restore_dtypes_from_strings(obj: Any) -> Any:
+    """Restore torch.dtype from string representations."""
+    if isinstance(obj, dict):
+        if "__dtype__" in obj:
+            dtype_str = obj["__dtype__"].split(".")[-1]
+            return getattr(torch, dtype_str)
+        elif "__tuple__" in obj:
+            return tuple(_restore_dtypes_from_strings(item) for item in obj["__tuple__"])
+        return {k: _restore_dtypes_from_strings(v) for k, v in obj.items()}
+    elif isinstance(obj, list):
+        return [_restore_dtypes_from_strings(item) for item in obj]
+    return obj
+
+
+def safe_serialize_state(extra_state: dict) -> torch.Tensor:
+    """Serialize extra_state safely without pickle.
+
+    Uses torch.save with weights_only=True for security.
+    Raises TypeError if extra_state contains unsafe types.
+    """
+    # Convert dtypes to strings for safe serialization
+    safe_state = _convert_dtypes_to_strings(extra_state)
+
+    # Serialize using PyTorch with new zipfile format
+    buffer = io.BytesIO()
+    torch.save(safe_state, buffer, _use_new_zipfile_serialization=True)
+
+    return torch.frombuffer(bytearray(buffer.getvalue()), dtype=torch.uint8)
+
+
+def safe_deserialize_state(state: torch.Tensor) -> dict:
+    """Deserialize extra_state safely without pickle.
+
+    Attempts new safe format first, falls back to pickle with warning for
+    backward compatibility with old checkpoints.
+    """
+    buffer = state.detach().cpu().numpy().tobytes()
+
+    try:
+        # Try new safe format (weights_only=True)
+        extra_state = torch.load(io.BytesIO(buffer), weights_only=True)
+        return _restore_dtypes_from_strings(extra_state)
+
+    except Exception:
+        # Fall back to pickle for old checkpoints
+        warn(
+            "Loading checkpoint in legacy pickle format. This poses a security risk (RCE). "
+            "Please re-save your checkpoint to use the new safe format. ",
+            FutureWarning,
+            stacklevel=2,
+        )
+        import pickle  # nosec - backward compatibility only
+
+        return pickle.loads(buffer)  # nosec
+
+
 def _modelopt_get_extra_state(self):
     """Populating the extra_state when state_dict() is called.
 
@@ -34,8 +103,7 @@ def _modelopt_get_extra_state(self):
     get_extra_state callbacks
 
     If there is no extra_state, None is returned. Otherwise, the dictionary
-    is serialized (via pickle) into a byte tensor following
-    TransformerEngine's approach. In this case, the extra_state,
+    is safely serialized into a byte tensor.
     """
     try:
         extra_state = super().get_extra_state()  # type: ignore[misc]
@@ -54,12 +122,9 @@ def _modelopt_get_extra_state(self):
     if len(extra_state) == 0:
         return None
 
-    # Serialize state into byte tensor
+    # Serialize state safely without pickle
     torch.cuda.synchronize()
-    state_serialized = bytearray(pickle.dumps(extra_state))  # nosec
-    state_serialized = torch.frombuffer(state_serialized, dtype=torch.uint8)
-
-    return state_serialized
+    return safe_serialize_state(extra_state)
 
 
 def _modelopt_set_extra_state(self, state: Any):
@@ -73,11 +138,7 @@ def _modelopt_set_extra_state(self, state: Any):
         return
 
     if isinstance(state, torch.Tensor):
-        # Default format: byte tensor with pickled data
-        #
-        # TODO: possible deserialization improvement
-        # https://github.com/NVIDIA/TensorRT-LLM/commits/main/tensorrt_llm/serialization.py
-        extra_state = pickle.loads(state.detach().cpu().numpy().tobytes())  # nosec
+        extra_state = safe_deserialize_state(state)
     else:
         raise RuntimeError("Unsupported extra_state format.")
 
diff --git a/modelopt/torch/opt/plugins/peft.py b/modelopt/torch/opt/plugins/peft.py
@@ -72,6 +72,7 @@ def _new_load_adapter(self, model_id, adapter_name, *args, **kwargs):
         assert adapter_name in self.peft_config, (
             f"ModelOpt modified model should have adapter_name={adapter_name} in peft_config"
         )
+        # Security NOTE: weights_only=False is used here on ModelOpt-generated state_dict, not on untrusted user input
         restore_from_modelopt_state(
             self, torch.load(modelopt_state_path, map_location="cpu", weights_only=False)
         )
@@ -85,6 +86,7 @@ def _new_load_adapter(self, model_id, adapter_name, *args, **kwargs):
     if os.path.isfile(_get_quantizer_state_save_path(model_id)):
         from modelopt.torch.quantization.nn import TensorQuantizer
 
+        # Security NOTE: weights_only=False is used here on ModelOpt-generated state_dict, not on untrusted user input
         quantizer_state_dict = torch.load(
             _get_quantizer_state_save_path(model_id), map_location="cpu", weights_only=False
         )
diff --git a/modelopt/torch/opt/searcher.py b/modelopt/torch/opt/searcher.py
@@ -249,6 +249,7 @@ def load_search_checkpoint(self) -> bool:
 
         # iterate through state dict and load keys
         print(f"Loading searcher state from {checkpoint}...")
+        # Security NOTE: weights_only=False is used here on ModelOpt-generated ckpt, not on untrusted user input
         state_dict = torch.load(checkpoint, weights_only=False)
         assert state_dict.keys() == self.state_dict().keys(), "Keys in checkpoint don't match!"
         for key, state in state_dict.items():
diff --git a/modelopt/torch/quantization/plugins/attention.py b/modelopt/torch/quantization/plugins/attention.py
@@ -207,22 +207,24 @@ def patch_binop(node, quantizer_names, transpose=False):
     head = ast.fix_missing_locations(head)
     org_class = model_module.__dict__[org_class_name]
 
-    quant_class = _create_quantized_class_from_ast(head, org_class, new_class_name, model_module)
+    quant_class = _create_quantized_class_from_ast(head, org_class, new_class_name)
     register(original_cls=org_class, quantized_cls=quant_class)
     print(f"Successfully registered {org_class_name} for quantization")
     return True
 
 
 def _create_quantized_class_from_ast(
-    head, org_class, new_class_name, model_module, temp_file_name=None
+    head: ast.Module,
+    org_class: type,
+    new_class_name: str,
+    temp_file_name: str | None = None,
 ):
     """Create a quantized class from an AST representation.
 
     Args:
         head: The AST head containing the modified class definition
         org_class: The original class to be quantized
         new_class_name: Name for the new quantized class
-        model_module: The module containing the original class
         temp_file_name: Optional file name to save the generated code
 
     Returns:
@@ -232,6 +234,19 @@ def _create_quantized_class_from_ast(
 
     # Save the generated code to a temporary file if requested
     module_code_str = ast.unparse(head)
+
+    # Security: Validate generated code doesn't contain suspicious patterns
+    suspicious_patterns = ["__import__", "eval", "exec", "compile", "open(", "os.system"]
+    for pattern in suspicious_patterns:
+        if pattern in module_code_str:
+            # Allow compile for specific trusted ModelOpt internal use
+            if pattern == "compile" and "torch.compile" in module_code_str:
+                continue
+            raise ValueError(
+                f"Generated code contains suspicious pattern '{pattern}'. "
+                f"This may indicate a security issue in AST transformation."
+            )
+
     if temp_file_name is None:
         with tempfile.NamedTemporaryFile(
             prefix="modelopt_", suffix=".py", delete=False
@@ -253,6 +268,11 @@ def _create_quantized_class_from_ast(
     # )  # bandit throws error here
     # quant_class = model_module.__dict__[new_class_name]
 
+    # Security NOTE: compile() is used here on internally-generated AST,
+    # not on untrusted user input. The AST is created by ModelOpt's quantization
+    # logic and has been validated above. This is safer than exec() but still
+    # requires the AST transformation logic to be secure.
+
     # Extract the bytecode and create a new class on the fly
     # This is more tricky but doesn't require runtime execution
     module_code = compile(head, filename=f"{temp_file_name}", mode="exec")
diff --git a/modelopt/torch/quantization/plugins/transformers_trainer.py b/modelopt/torch/quantization/plugins/transformers_trainer.py
@@ -188,6 +188,7 @@ def _save_modelopt_state_with_weights(self):
         print_rank_0(f"Saved modelopt state to {self._modelopt_state_path}")
 
     def _restore_modelopt_state_with_weights(self):
+        # Security NOTE: weights_only=False is used here on ModelOpt-generated state_dict, not on untrusted user input
         modelopt_state = torch.load(self._modelopt_state_path, weights_only=False)
         modelopt_weights = modelopt_state.pop("modelopt_state_weights", None)
         restore_from_modelopt_state(self.model, modelopt_state)
diff --git a/modelopt/torch/utils/distributed.py b/modelopt/torch/utils/distributed.py
@@ -87,6 +87,7 @@ def _deserialize(tensor: torch.Tensor, size: int | None = None) -> Any:
     buffer = tensor.numpy().tobytes()
     if size is not None:
         buffer = buffer[:size]
+    # Security NOTE: weights_only=False is used here on internally-generated buffer, not on untrusted user input
     obj = torch.load(io.BytesIO(buffer), weights_only=False)
     return obj
 

Original file line number	Diff line number	Diff line change
`@@ -242,6 +242,7 @@ def restore_sharded_modelopt_state(`
`242`	`242`	`return`
`243`	`243`
`244`	`244`	`# Loading the common modelopt_state (replicated on all ranks)`
	`245`	`+ # Security NOTE: weights_only=False is used here on NVIDIA-generated file, not on untrusted user input`
`245`	`246`	`common_modelopt_state = torch.load(`
`246`	`247`	`modelopt_checkpoint_name + "/" + COMMON_STATE_FNAME, weights_only=False`
`247`	`248`	`)`