Skip to content

Commit 7e16b15

Browse files
kevalmorabia97yeyu-nvidia
authored andcommitted
0.23.1 Release - fix for torch 2.6
1 parent 0dfb7c1 commit 7e16b15

File tree

19 files changed

+36
-23
lines changed

19 files changed

+36
-23
lines changed

.vscode/settings.json

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
"editor.rulers": [
44
100,
55
120
6-
], // 100 for black auto-formatter, 120 for hard limit in ruff
6+
], // 100 for ruff auto-formatter, 120 for hard limit in ruff
77
"[python]": {
88
"editor.defaultFormatter": "charliermarsh.ruff",
99
"editor.formatOnSave": true,
@@ -40,4 +40,7 @@
4040
"--no-cov",
4141
],
4242
"evenBetterToml.schema.enabled": false, // disable toml/json schema since we have custom fields
43+
"python.analysis.extraPaths": [
44+
"./tests/" // add tests to python path just like pytest does in pyproject.toml
45+
]
4346
}

examples/llm_distill/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ In this tutorial, we demonstrate how to use Model Optimizer to perform teacher-s
66

77
Checkout the stand-along distillation example in the [NVIDIA NeMo repository](https://github.com/NVIDIA/NeMo/tree/main/examples/nlp/language_modeling/megatron_gpt_distillation.py).
88

9-
You can also look at the tutorial notebooks [here](https://github.com/NVIDIA/NeMo/tree/main/tutorials/llm/llama-3/pruning-distillation) which showcase the usage of Minitron pruning followed by distillation for Llama 3.1 8B step-by-step in NeMo framework.
9+
You can also look at the tutorial notebooks [here](https://github.com/NVIDIA/NeMo/tree/main/tutorials/llm/llama/pruning-distillation) which showcase the usage of Minitron pruning followed by distillation for Llama 3.1 8B step-by-step in NeMo framework.
1010

1111
## Knowledge Distillation (KD) for HuggingFace Models
1212

examples/llm_distill/main.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -197,7 +197,7 @@ def train():
197197
if not os.path.isfile(modelopt_state_path):
198198
raise FileNotFoundError("`modelopt_state.pt` not found with checkpoint.")
199199
logger.info(f"Loading modelopt state from {modelopt_state_path}")
200-
modelopt_state = torch.load(modelopt_state_path)
200+
modelopt_state = torch.load(modelopt_state_path, weights_only=False)
201201
mto.restore_from_modelopt_state(model, modelopt_state)
202202

203203
logger.info("Beginning training...")

examples/llm_qat/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ torch.save(mto.modelopt_state(model), "modelopt_quantizer_states.pt")
6161

6262
# To resume training from a checkpoint or load the final QAT model for evaluation,
6363
# load the quantizer states before loading the model weights
64-
# mto.restore_from_modelopt_state(model, torch.load("modelopt_quantizer_states.pt"))
64+
# mto.restore_from_modelopt_state(model, torch.load("modelopt_quantizer_states.pt", weights_only=False))
6565
# After loading the quantizer states, load the model weights
6666
# model.load_state_dict(state_dict_from_last_checkpoint)
6767

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
11
flash-attn
22
sentencepiece>=0.2.0
33
tensorboardX
4-
torch>=2.1.0

examples/pruning/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ Model Optimizer can be used in one of the following complementary pruning modes
1010

1111
## Documentation
1212

13-
Checkout the [Quick Start: Pruning](https://nvidia.github.io/TensorRT-Model-Optimizer/getting_started/4_pruning.html) and the detailed [Optimization Guide](https://nvidia.github.io/TensorRT-Model-Optimizer/guides/2_pruning.html) in the Model Optimizer documentation for more information on how to use the above pruning algorithms in Model Optimizer.
13+
Checkout the [Quick Start: Pruning](https://nvidia.github.io/TensorRT-Model-Optimizer/getting_started/5_pruning.html) and the detailed [Optimization Guide](https://nvidia.github.io/TensorRT-Model-Optimizer/guides/2_pruning.html) in the Model Optimizer documentation for more information on how to use the above pruning algorithms in Model Optimizer.
1414

1515
## Algorithms
1616

examples/speculative_decoding/requirements.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,3 @@ openai
33
py7zr
44
sentencepiece>=0.2.0
55
tensorboardX
6-
torch>=2.1.0

modelopt/torch/export/distribute.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ def read_configs_and_weights_from_rank(
9191
raise ValueError("NFSWorkspace is not initialized!")
9292
state_path = self._get_state_path(target_rank)
9393
if state_path.exists():
94-
state = torch.load(state_path, map_location="cpu")
94+
state = torch.load(state_path, map_location="cpu", weights_only=False)
9595
return state["config"], state["weight"]
9696
else:
9797
return None, None

modelopt/torch/export/layer_utils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -822,8 +822,8 @@ def _split_gate_from_fc(decoder_type, module, fc_name, fc_layer):
822822
# for Int8 SQ case, we split the weight scaling factor into two parts.
823823
weight_scaling_factors = torch.chunk(weight_scaling_factor, 2, dim=0)
824824

825-
config.fc = build_linear_config(fc_linear)
826-
config.gate = build_linear_config(fc_linear)
825+
config.fc = build_linear_config(fc_linear, LINEAR_COLUMN)
826+
config.gate = build_linear_config(fc_linear, LINEAR_COLUMN)
827827
config.fc.weight = weights[0]
828828
config.gate.weight = weights[1]
829829
if weight_scaling_factors is not None:

modelopt/torch/opt/conversion.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -488,7 +488,7 @@ def restore_from_modelopt_state(model: ModelLike, modelopt_state: dict[str, Any]
488488
489489
# Restore the previously saved modelopt state followed by model weights
490490
mto.restore_from_modelopt_state(
491-
model, torch.load("modelopt_state.pt")
491+
model, torch.load("modelopt_state.pt", weights_only=False)
492492
) # Restore modelopt state
493493
model.load_state_dict(torch.load("model_weights.pt"), ...) # Load the model weights
494494
@@ -561,6 +561,7 @@ def restore(model: ModelLike, f: Union[str, os.PathLike, BinaryIO], **kwargs) ->
561561

562562
# load checkpoint
563563
kwargs.setdefault("map_location", "cpu")
564+
kwargs.setdefault("weights_only", False)
564565
objs = torch.load(f, **kwargs)
565566

566567
# restore model architecture

0 commit comments

Comments
 (0)