diff --git a/advanced_source/dynamic_quantization_tutorial.py b/advanced_source/dynamic_quantization_tutorial.py index 9cc07a1d956..c8d94789d5d 100644 --- a/advanced_source/dynamic_quantization_tutorial.py +++ b/advanced_source/dynamic_quantization_tutorial.py @@ -151,7 +151,8 @@ def tokenize(self, path): model.load_state_dict( torch.load( model_data_filepath + 'word_language_model_quantize.pth', - map_location=torch.device('cpu') + map_location=torch.device('cpu'), + weights_only=True ) ) diff --git a/advanced_source/static_quantization_tutorial.rst b/advanced_source/static_quantization_tutorial.rst index 3b818aa03aa..efb171c0dfe 100644 --- a/advanced_source/static_quantization_tutorial.rst +++ b/advanced_source/static_quantization_tutorial.rst @@ -286,7 +286,7 @@ We next define several helper functions to help with model evaluation. These mos def load_model(model_file): model = MobileNetV2() - state_dict = torch.load(model_file) + state_dict = torch.load(model_file, weights_only=True) model.load_state_dict(state_dict) model.to('cpu') return model diff --git a/beginner_source/basics/quickstart_tutorial.py b/beginner_source/basics/quickstart_tutorial.py index 07a1be517d1..df7628081ba 100644 --- a/beginner_source/basics/quickstart_tutorial.py +++ b/beginner_source/basics/quickstart_tutorial.py @@ -216,7 +216,7 @@ def test(dataloader, model, loss_fn): # the state dictionary into it. model = NeuralNetwork().to(device) -model.load_state_dict(torch.load("model.pth")) +model.load_state_dict(torch.load("model.pth", weights_only=True)) ############################################################# # This model can now be used to make predictions. diff --git a/beginner_source/basics/saveloadrun_tutorial.py b/beginner_source/basics/saveloadrun_tutorial.py index 16a9f037417..5b3aef124b0 100644 --- a/beginner_source/basics/saveloadrun_tutorial.py +++ b/beginner_source/basics/saveloadrun_tutorial.py @@ -32,9 +32,14 @@ ########################## # To load model weights, you need to create an instance of the same model first, and then load the parameters # using ``load_state_dict()`` method. +# +# In the code below, we set ``weights_only=True`` to limit the +# functions executed during unpickling to only those necessary for +# loading weights. Using ``weights_only=True`` is considered +# a best practice when loading weights. model = models.vgg16() # we do not specify ``weights``, i.e. create untrained model -model.load_state_dict(torch.load('model_weights.pth')) +model.load_state_dict(torch.load('model_weights.pth', weights_only=True)) model.eval() ########################### @@ -50,9 +55,14 @@ torch.save(model, 'model.pth') ######################## -# We can then load the model like this: +# We can then load the model as demonstrated below. +# +# As described in `Saving and loading torch.nn.Modules `__, +# saving ``state_dict``s is considered the best practice. However, +# below we use ``weights_only=False`` because this involves loading the +# model, which is a legacy use case for ``torch.save``. -model = torch.load('model.pth') +model = torch.load('model.pth', weights_only=False), ######################## # .. note:: This approach uses Python `pickle `_ module when serializing the model, thus it relies on the actual class definition to be available when loading the model. diff --git a/beginner_source/blitz/cifar10_tutorial.py b/beginner_source/blitz/cifar10_tutorial.py index 8e3f3252921..f38abdd5666 100644 --- a/beginner_source/blitz/cifar10_tutorial.py +++ b/beginner_source/blitz/cifar10_tutorial.py @@ -221,7 +221,7 @@ def forward(self, x): # wasn't necessary here, we only did it to illustrate how to do so): net = Net() -net.load_state_dict(torch.load(PATH)) +net.load_state_dict(torch.load(PATH, weights_only=True)) ######################################################################## # Okay, now let us see what the neural network thinks these examples above are: diff --git a/beginner_source/fgsm_tutorial.py b/beginner_source/fgsm_tutorial.py index 007ad3fd956..9bdf52d84b4 100644 --- a/beginner_source/fgsm_tutorial.py +++ b/beginner_source/fgsm_tutorial.py @@ -192,7 +192,7 @@ def forward(self, x): model = Net().to(device) # Load the pretrained model -model.load_state_dict(torch.load(pretrained_model, map_location=device)) +model.load_state_dict(torch.load(pretrained_model, map_location=device, weights_only=True)) # Set the model in evaluation mode. In this case this is for the Dropout layers model.eval() diff --git a/beginner_source/saving_loading_models.py b/beginner_source/saving_loading_models.py index fcd33be2537..6c9b6b1fd77 100644 --- a/beginner_source/saving_loading_models.py +++ b/beginner_source/saving_loading_models.py @@ -153,7 +153,7 @@ # .. code:: python # # model = TheModelClass(*args, **kwargs) -# model.load_state_dict(torch.load(PATH)) +# model.load_state_dict(torch.load(PATH), weights_only=True) # model.eval() # # .. note:: @@ -206,7 +206,7 @@ # .. code:: python # # # Model class must be defined somewhere -# model = torch.load(PATH) +# model = torch.load(PATH, weights_only=False) # model.eval() # # This save/load process uses the most intuitive syntax and involves the @@ -290,7 +290,7 @@ # model = TheModelClass(*args, **kwargs) # optimizer = TheOptimizerClass(*args, **kwargs) # -# checkpoint = torch.load(PATH) +# checkpoint = torch.load(PATH, weights_only=True) # model.load_state_dict(checkpoint['model_state_dict']) # optimizer.load_state_dict(checkpoint['optimizer_state_dict']) # epoch = checkpoint['epoch'] @@ -354,7 +354,7 @@ # optimizerA = TheOptimizerAClass(*args, **kwargs) # optimizerB = TheOptimizerBClass(*args, **kwargs) # -# checkpoint = torch.load(PATH) +# checkpoint = torch.load(PATH, weights_only=True) # modelA.load_state_dict(checkpoint['modelA_state_dict']) # modelB.load_state_dict(checkpoint['modelB_state_dict']) # optimizerA.load_state_dict(checkpoint['optimizerA_state_dict']) @@ -407,7 +407,7 @@ # .. code:: python # # modelB = TheModelBClass(*args, **kwargs) -# modelB.load_state_dict(torch.load(PATH), strict=False) +# modelB.load_state_dict(torch.load(PATH), strict=False, weights_only=True) # # Partially loading a model or loading a partial model are common # scenarios when transfer learning or training a new complex model. @@ -446,7 +446,7 @@ # # device = torch.device('cpu') # model = TheModelClass(*args, **kwargs) -# model.load_state_dict(torch.load(PATH, map_location=device)) +# model.load_state_dict(torch.load(PATH, map_location=device, weights_only=True)) # # When loading a model on a CPU that was trained with a GPU, pass # ``torch.device('cpu')`` to the ``map_location`` argument in the @@ -469,7 +469,7 @@ # # device = torch.device("cuda") # model = TheModelClass(*args, **kwargs) -# model.load_state_dict(torch.load(PATH)) +# model.load_state_dict(torch.load(PATH, weights_only=True)) # model.to(device) # # Make sure to call input = input.to(device) on any input tensors that you feed to the model # @@ -497,7 +497,7 @@ # # device = torch.device("cuda") # model = TheModelClass(*args, **kwargs) -# model.load_state_dict(torch.load(PATH, map_location="cuda:0")) # Choose whatever GPU device number you want +# model.load_state_dict(torch.load(PATH, weights_only=True, map_location="cuda:0")) # Choose whatever GPU device number you want # model.to(device) # # Make sure to call input = input.to(device) on any input tensors that you feed to the model # diff --git a/beginner_source/transfer_learning_tutorial.py b/beginner_source/transfer_learning_tutorial.py index 7a2b053763a..de7a178bd7d 100644 --- a/beginner_source/transfer_learning_tutorial.py +++ b/beginner_source/transfer_learning_tutorial.py @@ -209,7 +209,7 @@ def train_model(model, criterion, optimizer, scheduler, num_epochs=25): print(f'Best val Acc: {best_acc:4f}') # load best model weights - model.load_state_dict(torch.load(best_model_params_path)) + model.load_state_dict(torch.load(best_model_params_path, weights_only=True)) return model diff --git a/intermediate_source/autograd_saved_tensors_hooks_tutorial.py b/intermediate_source/autograd_saved_tensors_hooks_tutorial.py index f16b170ee6a..ed581426c2e 100644 --- a/intermediate_source/autograd_saved_tensors_hooks_tutorial.py +++ b/intermediate_source/autograd_saved_tensors_hooks_tutorial.py @@ -397,7 +397,7 @@ def pack_hook(tensor): return name def unpack_hook(name): - return torch.load(name) + return torch.load(name, weights_only=True) ###################################################################### @@ -420,7 +420,7 @@ def pack_hook(tensor): return name def unpack_hook(name): - tensor = torch.load(name) + tensor = torch.load(name, weights_only=True) os.remove(name) return tensor @@ -462,7 +462,7 @@ def pack_hook(tensor): return temp_file def unpack_hook(temp_file): - return torch.load(temp_file.name) + return torch.load(temp_file.name, weights_only=True) ###################################################################### diff --git a/intermediate_source/ddp_tutorial.rst b/intermediate_source/ddp_tutorial.rst index 13297fb2a12..cff5105fa54 100644 --- a/intermediate_source/ddp_tutorial.rst +++ b/intermediate_source/ddp_tutorial.rst @@ -214,7 +214,7 @@ and elasticity support, please refer to `TorchElastic `_. diff --git a/recipes_source/recipes/module_load_state_dict_tips.py b/recipes_source/recipes/module_load_state_dict_tips.py index 17c812b016f..70e9830cb3c 100644 --- a/recipes_source/recipes/module_load_state_dict_tips.py +++ b/recipes_source/recipes/module_load_state_dict_tips.py @@ -39,7 +39,7 @@ def forward(self, x): # to ``torch.load``, the ``torch.device()`` context manager and the ``assign`` # keyword argument to ``nn.Module.load_state_dict()``. -state_dict = torch.load('checkpoint.pth', mmap=True) +state_dict = torch.load('checkpoint.pth', mmap=True, weights_only=True) with torch.device('meta'): meta_m = SomeModule(1000) meta_m.load_state_dict(state_dict, assign=True) @@ -47,7 +47,7 @@ def forward(self, x): ############################################################################# # Compare the snippet below to the one above: -state_dict = torch.load('checkpoint.pth') +state_dict = torch.load('checkpoint.pth', weights_only=True) m = SomeModule(1000) m.load_state_dict(state_dict) @@ -71,7 +71,7 @@ def forward(self, x): # * Waiting for the entire checkpoint to be loaded into RAM before performing, for example, some per-tensor processing. start_time = time.time() -state_dict = torch.load('checkpoint.pth') +state_dict = torch.load('checkpoint.pth', weights_only=True) end_time = time.time() print(f"loading time without mmap={end_time - start_time}") @@ -84,7 +84,7 @@ def forward(self, x): # storages will be memory-mapped. start_time = time.time() -state_dict = torch.load('checkpoint.pth', mmap=True) +state_dict = torch.load('checkpoint.pth', mmap=True, weights_only=True) end_time = time.time() print(f"loading time with mmap={end_time - start_time}") diff --git a/recipes_source/recipes/save_load_across_devices.py b/recipes_source/recipes/save_load_across_devices.py index be950e15b13..c59af8821e9 100644 --- a/recipes_source/recipes/save_load_across_devices.py +++ b/recipes_source/recipes/save_load_across_devices.py @@ -97,7 +97,7 @@ def forward(self, x): # Load device = torch.device('cpu') model = Net() -model.load_state_dict(torch.load(PATH, map_location=device)) +model.load_state_dict(torch.load(PATH, map_location=device, weights_only=True)) ###################################################################### diff --git a/recipes_source/recipes/saving_and_loading_a_general_checkpoint.py b/recipes_source/recipes/saving_and_loading_a_general_checkpoint.py index 31b14f3a28a..8c773a14909 100644 --- a/recipes_source/recipes/saving_and_loading_a_general_checkpoint.py +++ b/recipes_source/recipes/saving_and_loading_a_general_checkpoint.py @@ -131,7 +131,7 @@ def forward(self, x): model = Net() optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9) -checkpoint = torch.load(PATH) +checkpoint = torch.load(PATH, weights_only=True) model.load_state_dict(checkpoint['model_state_dict']) optimizer.load_state_dict(checkpoint['optimizer_state_dict']) epoch = checkpoint['epoch'] diff --git a/recipes_source/recipes/saving_and_loading_models_for_inference.py b/recipes_source/recipes/saving_and_loading_models_for_inference.py index cd24b77c1de..7adce2a90b5 100644 --- a/recipes_source/recipes/saving_and_loading_models_for_inference.py +++ b/recipes_source/recipes/saving_and_loading_models_for_inference.py @@ -117,7 +117,7 @@ def forward(self, x): # Load model = Net() -model.load_state_dict(torch.load(PATH)) +model.load_state_dict(torch.load(PATH, weights_only=True)) model.eval() diff --git a/recipes_source/recipes/saving_multiple_models_in_one_file.py b/recipes_source/recipes/saving_multiple_models_in_one_file.py index f468d7ac6a1..e938be03b45 100644 --- a/recipes_source/recipes/saving_multiple_models_in_one_file.py +++ b/recipes_source/recipes/saving_multiple_models_in_one_file.py @@ -128,7 +128,7 @@ def forward(self, x): optimModelA = optim.SGD(modelA.parameters(), lr=0.001, momentum=0.9) optimModelB = optim.SGD(modelB.parameters(), lr=0.001, momentum=0.9) -checkpoint = torch.load(PATH) +checkpoint = torch.load(PATH, weights_only=True) modelA.load_state_dict(checkpoint['modelA_state_dict']) modelB.load_state_dict(checkpoint['modelB_state_dict']) optimizerA.load_state_dict(checkpoint['optimizerA_state_dict']) diff --git a/recipes_source/recipes/warmstarting_model_using_parameters_from_a_different_model.py b/recipes_source/recipes/warmstarting_model_using_parameters_from_a_different_model.py index 40aeeea9db8..a0752bfc67d 100644 --- a/recipes_source/recipes/warmstarting_model_using_parameters_from_a_different_model.py +++ b/recipes_source/recipes/warmstarting_model_using_parameters_from_a_different_model.py @@ -124,7 +124,7 @@ def forward(self, x): # are loading into. # -netB.load_state_dict(torch.load(PATH), strict=False) +netB.load_state_dict(torch.load(PATH, weights_only=True), strict=False) ######################################################################