NVIDIA
diff --git a/‎docs/source/guides/2_save_load.rst‎
Lines changed: 1 addition & 0 deletions b/‎docs/source/guides/2_save_load.rst‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎examples/llm_qat/export.py‎
Lines changed: 1 addition & 0 deletions b/‎examples/llm_qat/export.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎examples/llm_sparsity/README.md‎
Lines changed: 1 addition & 1 deletion b/‎examples/llm_sparsity/README.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/llm_sparsity/finetune.py‎
Lines changed: 10 additions & 21 deletions b/‎examples/llm_sparsity/finetune.py‎
Lines changed: 10 additions & 21 deletions
diff --git a/‎modelopt/onnx/quantization/__main__.py‎
Lines changed: 22 additions & 4 deletions b/‎modelopt/onnx/quantization/__main__.py‎
Lines changed: 22 additions & 4 deletions
@@ -129,6 +129,7 @@ Here is the example workflow of restoring the ModelOpt-modified model architectu
     model = ...
 
     # Restore the model architecture using the saved `modelopt_state`
+    # Security NOTE: weights_only=False is used here on ModelOpt-generated state_dict, not on untrusted user input
     modelopt_state = torch.load("modelopt_state.pth", weights_only=False)
     model = mto.restore_from_modelopt_state(model, modelopt_state)
 
 
@@ -51,6 +51,7 @@ def get_model(
 
     # Restore modelopt state for LoRA models. For QAT/QAD models from_pretrained call handles this
     if hasattr(model, "peft_config"):
+        # Security NOTE: weights_only=False is used here on ModelOpt-generated state_dict, not on untrusted user input
         modelopt_state = torch.load(f"{ckpt_path}/modelopt_state_train.pth", weights_only=False)
         restore_from_modelopt_state(model, modelopt_state)
         print_rank_0("Restored modelopt state")
 
@@ -84,7 +84,7 @@ python data_prep.py --save_path data
 
 The following command demonstrates how to perform SAT on the Llama2-7B model on 8 GPUs.
 The model is finetuned on the [cnn_dailymail](https://huggingface.co/datasets/abisee/cnn_dailymail) dataset for 3 epochs.
-The input data is tokenized to a maximum length of 1024 tokens. The tokenized data is saved as a pickle file for faster data loading. The one-time process takes less than an hour to finish depending on the CPU. The resulting pickle file can be utilized for future training sessions.
+The input data is tokenized to a maximum length of 1024 tokens.
 
 ```sh
 bash launch_finetune.sh --model meta-llama/Llama-2-7b-hf \
 
@@ -32,7 +32,6 @@
 import argparse
 import copy
 import os
-import pickle
 from collections.abc import Sequence
 from dataclasses import dataclass, field
 
@@ -232,27 +231,17 @@ def __init__(
     ):
         super().__init__()
 
-        pickle_name = f"dict_{split}_{tokenizer.model_max_length}.pickle"
         with training_args.main_process_first():
-            if os.path.isfile(pickle_name):
-                with open(pickle_name, "rb") as f:
-                    print_rank_0("Reuse pickled data")
-                    data_dict = pickle.load(f)
-            else:
-                print_rank_0("Loading data...")
-                list_data_dict = utils.jload(data_path)
-
-                print_rank_0("Formatting inputs...")
-                prompt_input = PROMPT_DICT["prompt_input"]
-                sources = [prompt_input.format_map(example) for example in list_data_dict]
-                targets = [
-                    f"{example['output']}{tokenizer.eos_token}" for example in list_data_dict
-                ]
-
-                print_rank_0("Tokenizing inputs... This may take some time...")
-                data_dict = preprocess(sources, targets, tokenizer)
-                with open(pickle_name, "wb") as f:
-                    pickle.dump(data_dict, f, pickle.HIGHEST_PROTOCOL)
+            print_rank_0("Loading data...")
+            list_data_dict = utils.jload(data_path)
+
+            print_rank_0("Formatting inputs...")
+            prompt_input = PROMPT_DICT["prompt_input"]
+            sources = [prompt_input.format_map(example) for example in list_data_dict]
+            targets = [f"{example['output']}{tokenizer.eos_token}" for example in list_data_dict]
+
+            print_rank_0("Tokenizing inputs... This may take some time...")
+            data_dict = preprocess(sources, targets, tokenizer)
 
         self.input_ids = data_dict["input_ids"]
         self.labels = data_dict["labels"]
 
@@ -52,6 +52,11 @@ def get_parser() -> argparse.ArgumentParser:
         type=str,
         help="Calibration data in npz/npy format. If None, random data for calibration will be used.",
     )
+    group.add_argument(
+        "--trust_calibration_data",
+        action="store_true",
+        help="If True, trust the calibration data and allow pickle deserialization.",
+    )
     group.add_argument(
         "--calibration_cache_path",
         type=str,
@@ -263,10 +268,23 @@ def main():
     args = get_parser().parse_args()
     calibration_data = None
     if args.calibration_data_path:
-        calibration_data = np.load(args.calibration_data_path, allow_pickle=True)
-        if args.calibration_data_path.endswith(".npz"):
-            # Convert the NpzFile object to a Python dictionary
-            calibration_data = {key: calibration_data[key] for key in calibration_data.files}
+        # Security: Disable pickle deserialization for untrusted sources to prevent RCE attacks
+        try:
+            calibration_data = np.load(
+                args.calibration_data_path, allow_pickle=args.trust_calibration_data
+            )
+            if args.calibration_data_path.endswith(".npz"):
+                # Convert the NpzFile object to a Python dictionary
+                calibration_data = {key: calibration_data[key] for key in calibration_data.files}
+        except ValueError as e:
+            if "allow_pickle" in str(e) and not args.trust_calibration_data:
+                raise ValueError(
+                    "Calibration data file contains pickled objects which pose a security risk. "
+                    "For trusted sources, you may enable pickle deserialization by setting the "
+                    "--trust_calibration_data flag."
+                ) from e
+            else:
+                raise
 
     quantize(
         args.onnx_path,