diff --git a/advanced_source/dynamic_quantization_tutorial.py b/advanced_source/dynamic_quantization_tutorial.py
index 9cc07a1d956..c8d94789d5d 100644
--- a/advanced_source/dynamic_quantization_tutorial.py
+++ b/advanced_source/dynamic_quantization_tutorial.py
@@ -151,7 +151,8 @@ def tokenize(self, path):
 model.load_state_dict(
     torch.load(
         model_data_filepath + 'word_language_model_quantize.pth',
-        map_location=torch.device('cpu')
+        map_location=torch.device('cpu'),
+        weights_only=True
         )
     )
 
diff --git a/advanced_source/static_quantization_tutorial.rst b/advanced_source/static_quantization_tutorial.rst
index 3b818aa03aa..efb171c0dfe 100644
--- a/advanced_source/static_quantization_tutorial.rst
+++ b/advanced_source/static_quantization_tutorial.rst
@@ -286,7 +286,7 @@ We next define several helper functions to help with model evaluation. These mos
 
     def load_model(model_file): 
         model = MobileNetV2() 
-        state_dict = torch.load(model_file) 
+        state_dict = torch.load(model_file, weights_only=True) 
         model.load_state_dict(state_dict) 
         model.to('cpu') 
         return model  
diff --git a/beginner_source/basics/quickstart_tutorial.py b/beginner_source/basics/quickstart_tutorial.py
index 07a1be517d1..df7628081ba 100644
--- a/beginner_source/basics/quickstart_tutorial.py
+++ b/beginner_source/basics/quickstart_tutorial.py
@@ -216,7 +216,7 @@ def test(dataloader, model, loss_fn):
 # the state dictionary into it.
 
 model = NeuralNetwork().to(device)
-model.load_state_dict(torch.load("model.pth"))
+model.load_state_dict(torch.load("model.pth", weights_only=True))
 
 #############################################################
 # This model can now be used to make predictions.
diff --git a/beginner_source/basics/saveloadrun_tutorial.py b/beginner_source/basics/saveloadrun_tutorial.py
index 16a9f037417..5b3aef124b0 100644
--- a/beginner_source/basics/saveloadrun_tutorial.py
+++ b/beginner_source/basics/saveloadrun_tutorial.py
@@ -32,9 +32,14 @@
 ##########################
 # To load model weights, you need to create an instance of the same model first, and then load the parameters
 # using ``load_state_dict()`` method.
+#
+# In the code below, we set ``weights_only=True`` to limit the
+# functions executed during unpickling to only those necessary for
+# loading weights. Using ``weights_only=True`` is considered
+# a best practice when loading weights.
 
 model = models.vgg16() # we do not specify ``weights``, i.e. create untrained model
-model.load_state_dict(torch.load('model_weights.pth'))
+model.load_state_dict(torch.load('model_weights.pth', weights_only=True))
 model.eval()
 
 ###########################
@@ -50,9 +55,14 @@
 torch.save(model, 'model.pth')
 
 ########################
-# We can then load the model like this:
+# We can then load the model as demonstrated below.
+#
+# As described in `Saving and loading torch.nn.Modules <pytorch.org/docs/main/notes/serialization.html#saving-and-loading-torch-nn-modules>`__,
+# saving ``state_dict``s is considered the best practice. However,
+# below we use ``weights_only=False`` because this involves loading the
+# model, which is a legacy use case for ``torch.save``.
 
-model = torch.load('model.pth')
+model = torch.load('model.pth', weights_only=False),
 
 ########################
 # .. note:: This approach uses Python `pickle <https://docs.python.org/3/library/pickle.html>`_ module when serializing the model, thus it relies on the actual class definition to be available when loading the model.
diff --git a/beginner_source/blitz/cifar10_tutorial.py b/beginner_source/blitz/cifar10_tutorial.py
index 8e3f3252921..f38abdd5666 100644
--- a/beginner_source/blitz/cifar10_tutorial.py
+++ b/beginner_source/blitz/cifar10_tutorial.py
@@ -221,7 +221,7 @@ def forward(self, x):
 # wasn't necessary here, we only did it to illustrate how to do so):
 
 net = Net()
-net.load_state_dict(torch.load(PATH))
+net.load_state_dict(torch.load(PATH, weights_only=True))
 
 ########################################################################
 # Okay, now let us see what the neural network thinks these examples above are:
diff --git a/beginner_source/fgsm_tutorial.py b/beginner_source/fgsm_tutorial.py
index 007ad3fd956..9bdf52d84b4 100644
--- a/beginner_source/fgsm_tutorial.py
+++ b/beginner_source/fgsm_tutorial.py
@@ -192,7 +192,7 @@ def forward(self, x):
 model = Net().to(device)
 
 # Load the pretrained model
-model.load_state_dict(torch.load(pretrained_model, map_location=device))
+model.load_state_dict(torch.load(pretrained_model, map_location=device, weights_only=True))
 
 # Set the model in evaluation mode. In this case this is for the Dropout layers
 model.eval()
diff --git a/beginner_source/saving_loading_models.py b/beginner_source/saving_loading_models.py
index fcd33be2537..6c9b6b1fd77 100644
--- a/beginner_source/saving_loading_models.py
+++ b/beginner_source/saving_loading_models.py
@@ -153,7 +153,7 @@
 # .. code:: python
 #
 #    model = TheModelClass(*args, **kwargs)
-#    model.load_state_dict(torch.load(PATH))
+#    model.load_state_dict(torch.load(PATH), weights_only=True)
 #    model.eval()
 #
 # .. note::
@@ -206,7 +206,7 @@
 # .. code:: python
 #
 #    # Model class must be defined somewhere
-#    model = torch.load(PATH)
+#    model = torch.load(PATH, weights_only=False)
 #    model.eval()
 #
 # This save/load process uses the most intuitive syntax and involves the
@@ -290,7 +290,7 @@
 #    model = TheModelClass(*args, **kwargs)
 #    optimizer = TheOptimizerClass(*args, **kwargs)
 #
-#    checkpoint = torch.load(PATH)
+#    checkpoint = torch.load(PATH, weights_only=True)
 #    model.load_state_dict(checkpoint['model_state_dict'])
 #    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
 #    epoch = checkpoint['epoch']
@@ -354,7 +354,7 @@
 #    optimizerA = TheOptimizerAClass(*args, **kwargs)
 #    optimizerB = TheOptimizerBClass(*args, **kwargs)
 #
-#    checkpoint = torch.load(PATH)
+#    checkpoint = torch.load(PATH, weights_only=True)
 #    modelA.load_state_dict(checkpoint['modelA_state_dict'])
 #    modelB.load_state_dict(checkpoint['modelB_state_dict'])
 #    optimizerA.load_state_dict(checkpoint['optimizerA_state_dict'])
@@ -407,7 +407,7 @@
 # .. code:: python
 #
 #    modelB = TheModelBClass(*args, **kwargs)
-#    modelB.load_state_dict(torch.load(PATH), strict=False)
+#    modelB.load_state_dict(torch.load(PATH), strict=False, weights_only=True)
 #
 # Partially loading a model or loading a partial model are common
 # scenarios when transfer learning or training a new complex model.
@@ -446,7 +446,7 @@
 #
 #    device = torch.device('cpu')
 #    model = TheModelClass(*args, **kwargs)
-#    model.load_state_dict(torch.load(PATH, map_location=device))
+#    model.load_state_dict(torch.load(PATH, map_location=device, weights_only=True))
 #
 # When loading a model on a CPU that was trained with a GPU, pass
 # ``torch.device('cpu')`` to the ``map_location`` argument in the
@@ -469,7 +469,7 @@
 #
 #    device = torch.device("cuda")
 #    model = TheModelClass(*args, **kwargs)
-#    model.load_state_dict(torch.load(PATH))
+#    model.load_state_dict(torch.load(PATH, weights_only=True))
 #    model.to(device)
 #    # Make sure to call input = input.to(device) on any input tensors that you feed to the model
 #
@@ -497,7 +497,7 @@
 #
 #    device = torch.device("cuda")
 #    model = TheModelClass(*args, **kwargs)
-#    model.load_state_dict(torch.load(PATH, map_location="cuda:0"))  # Choose whatever GPU device number you want
+#    model.load_state_dict(torch.load(PATH, weights_only=True, map_location="cuda:0"))  # Choose whatever GPU device number you want
 #    model.to(device)
 #    # Make sure to call input = input.to(device) on any input tensors that you feed to the model
 #
diff --git a/beginner_source/transfer_learning_tutorial.py b/beginner_source/transfer_learning_tutorial.py
index 7a2b053763a..de7a178bd7d 100644
--- a/beginner_source/transfer_learning_tutorial.py
+++ b/beginner_source/transfer_learning_tutorial.py
@@ -209,7 +209,7 @@ def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
         print(f'Best val Acc: {best_acc:4f}')
 
         # load best model weights
-        model.load_state_dict(torch.load(best_model_params_path))
+        model.load_state_dict(torch.load(best_model_params_path, weights_only=True))
     return model
 
 
diff --git a/intermediate_source/autograd_saved_tensors_hooks_tutorial.py b/intermediate_source/autograd_saved_tensors_hooks_tutorial.py
index f16b170ee6a..ed581426c2e 100644
--- a/intermediate_source/autograd_saved_tensors_hooks_tutorial.py
+++ b/intermediate_source/autograd_saved_tensors_hooks_tutorial.py
@@ -397,7 +397,7 @@ def pack_hook(tensor):
     return name
 
 def unpack_hook(name):
-    return torch.load(name)
+    return torch.load(name, weights_only=True)
 
 
 ######################################################################
@@ -420,7 +420,7 @@ def pack_hook(tensor):
     return name
 
 def unpack_hook(name):
-    tensor = torch.load(name)
+    tensor = torch.load(name, weights_only=True)
     os.remove(name)
     return tensor
 
@@ -462,7 +462,7 @@ def pack_hook(tensor):
     return temp_file
 
 def unpack_hook(temp_file):
-    return torch.load(temp_file.name)
+    return torch.load(temp_file.name, weights_only=True)
 
 
 ######################################################################
diff --git a/intermediate_source/ddp_tutorial.rst b/intermediate_source/ddp_tutorial.rst
index 13297fb2a12..cff5105fa54 100644
--- a/intermediate_source/ddp_tutorial.rst
+++ b/intermediate_source/ddp_tutorial.rst
@@ -214,7 +214,7 @@ and elasticity support, please refer to `TorchElastic <https://pytorch.org/elast
         # configure map_location properly
         map_location = {'cuda:%d' % 0: 'cuda:%d' % rank}
         ddp_model.load_state_dict(
-            torch.load(CHECKPOINT_PATH, map_location=map_location))
+            torch.load(CHECKPOINT_PATH, map_location=map_location, weights_only=True))
 
         loss_fn = nn.MSELoss()
         optimizer = optim.SGD(ddp_model.parameters(), lr=0.001)
diff --git a/intermediate_source/tiatoolbox_tutorial.rst b/intermediate_source/tiatoolbox_tutorial.rst
index dbaf3cdc464..de9b3031330 100644
--- a/intermediate_source/tiatoolbox_tutorial.rst
+++ b/intermediate_source/tiatoolbox_tutorial.rst
@@ -368,7 +368,7 @@ The PatchPredictor class runs a CNN-based classifier written in PyTorch.
 
     # Users can load any PyTorch model architecture instead using the following script
     model = vanilla.CNNModel(backbone="resnet18", num_classes=9) # Importing model from torchvision.models.resnet18
-    model.load_state_dict(torch.load(weights_path, map_location="cpu"), strict=True)
+    model.load_state_dict(torch.load(weights_path, map_location="cpu", weights_only=True), strict=True)
     def preproc_func(img):
         img = PIL.Image.fromarray(img)
         img = transforms.ToTensor()(img)
diff --git a/prototype_source/fx_graph_mode_ptq_dynamic.py b/prototype_source/fx_graph_mode_ptq_dynamic.py
index 84d6ccb1832..fc29e5fa97b 100644
--- a/prototype_source/fx_graph_mode_ptq_dynamic.py
+++ b/prototype_source/fx_graph_mode_ptq_dynamic.py
@@ -171,7 +171,8 @@ def tokenize(self, path):
 model.load_state_dict(
     torch.load(
         model_data_filepath + 'word_language_model_quantize.pth',
-        map_location=torch.device('cpu')
+        map_location=torch.device('cpu'),
+        weights_only=True
         )
     )
 
diff --git a/prototype_source/fx_graph_mode_ptq_static.rst b/prototype_source/fx_graph_mode_ptq_static.rst
index a7165f713f8..0c4f8065e37 100644
--- a/prototype_source/fx_graph_mode_ptq_static.rst
+++ b/prototype_source/fx_graph_mode_ptq_static.rst
@@ -157,7 +157,7 @@ Download the `torchvision resnet18 model <https://download.pytorch.org/models/re
 
     def load_model(model_file):
         model = resnet18(pretrained=False)
-        state_dict = torch.load(model_file)
+        state_dict = torch.load(model_file, weights_only=True)
         model.load_state_dict(state_dict)
         model.to("cpu")
         return model
@@ -320,7 +320,7 @@ We can now print the size and accuracy of the quantized model.
     # ModuleAttributeError: 'ConvReLU2d' object has no attribute '_modules'
     # save the whole model directly
     # torch.save(quantized_model, fx_graph_mode_model_file_path)
-    # loaded_quantized_model = torch.load(fx_graph_mode_model_file_path)
+    # loaded_quantized_model = torch.load(fx_graph_mode_model_file_path, weights_only=False)
 
     # save with state_dict
     # torch.save(quantized_model.state_dict(), fx_graph_mode_model_file_path)
@@ -328,7 +328,7 @@ We can now print the size and accuracy of the quantized model.
     # model_to_quantize = copy.deepcopy(float_model)
     # prepared_model = prepare_fx(model_to_quantize, {"": qconfig})
     # loaded_quantized_model = convert_fx(prepared_model)
-    # loaded_quantized_model.load_state_dict(torch.load(fx_graph_mode_model_file_path))
+    # loaded_quantized_model.load_state_dict(torch.load(fx_graph_mode_model_file_path), weights_only=True)
 
     # save with script
     torch.jit.save(torch.jit.script(quantized_model), fx_graph_mode_model_file_path)
diff --git a/prototype_source/pt2e_quant_ptq.rst b/prototype_source/pt2e_quant_ptq.rst
index 7f46c86e42e..0fe713f8abe 100644
--- a/prototype_source/pt2e_quant_ptq.rst
+++ b/prototype_source/pt2e_quant_ptq.rst
@@ -274,7 +274,7 @@ and rename it to ``data/resnet18_pretrained_float.pth``.
 
     def load_model(model_file):
         model = resnet18(pretrained=False)
-        state_dict = torch.load(model_file)
+        state_dict = torch.load(model_file, weights_only=True)
         model.load_state_dict(state_dict)
         model.to("cpu")
         return model
diff --git a/prototype_source/pt2e_quant_qat.rst b/prototype_source/pt2e_quant_qat.rst
index 6d995d368e0..d716af5fec8 100644
--- a/prototype_source/pt2e_quant_qat.rst
+++ b/prototype_source/pt2e_quant_qat.rst
@@ -172,7 +172,7 @@ prepare the data. These steps are very similar to the ones defined in the
 
     def load_model(model_file):
         model = resnet18(pretrained=False)
-        state_dict = torch.load(model_file)
+        state_dict = torch.load(model_file, weights_only=True)
         model.load_state_dict(state_dict)
         return model
 
diff --git a/recipes_source/intel_neural_compressor_for_pytorch.rst b/recipes_source/intel_neural_compressor_for_pytorch.rst
index 67f1a7f333e..02ce3d7b378 100755
--- a/recipes_source/intel_neural_compressor_for_pytorch.rst
+++ b/recipes_source/intel_neural_compressor_for_pytorch.rst
@@ -115,7 +115,7 @@ In this tutorial, the LeNet model is used to demonstrate how to deal with *Intel
             return F.log_softmax(x, dim=1)
 
     model = Net()
-    model.load_state_dict(torch.load('./lenet_mnist_model.pth'))
+    model.load_state_dict(torch.load('./lenet_mnist_model.pth', weights_only=True))
 
 The pretrained model weight `lenet_mnist_model.pth` comes from
 `here <https://drive.google.com/drive/folders/1fn83DF14tWmit0RTKWRhPq5uVXt73e0h?usp=sharing>`_.
diff --git a/recipes_source/recipes/module_load_state_dict_tips.py b/recipes_source/recipes/module_load_state_dict_tips.py
index 17c812b016f..70e9830cb3c 100644
--- a/recipes_source/recipes/module_load_state_dict_tips.py
+++ b/recipes_source/recipes/module_load_state_dict_tips.py
@@ -39,7 +39,7 @@ def forward(self, x):
 # to ``torch.load``, the ``torch.device()`` context manager and the ``assign``
 # keyword argument to ``nn.Module.load_state_dict()``.
 
-state_dict = torch.load('checkpoint.pth', mmap=True)
+state_dict = torch.load('checkpoint.pth', mmap=True, weights_only=True)
 with torch.device('meta'):
   meta_m = SomeModule(1000)
 meta_m.load_state_dict(state_dict, assign=True)
@@ -47,7 +47,7 @@ def forward(self, x):
 #############################################################################
 # Compare the snippet below to the one above:
 
-state_dict = torch.load('checkpoint.pth')
+state_dict = torch.load('checkpoint.pth', weights_only=True)
 m = SomeModule(1000)
 m.load_state_dict(state_dict)
 
@@ -71,7 +71,7 @@ def forward(self, x):
 # * Waiting for the entire checkpoint to be loaded into RAM before performing, for example, some per-tensor processing.
 
 start_time = time.time()
-state_dict = torch.load('checkpoint.pth')
+state_dict = torch.load('checkpoint.pth', weights_only=True)
 end_time = time.time()
 print(f"loading time without mmap={end_time - start_time}")
 
@@ -84,7 +84,7 @@ def forward(self, x):
 # storages will be memory-mapped.
 
 start_time = time.time()
-state_dict = torch.load('checkpoint.pth', mmap=True)
+state_dict = torch.load('checkpoint.pth', mmap=True, weights_only=True)
 end_time = time.time()
 print(f"loading time with mmap={end_time - start_time}")
 
diff --git a/recipes_source/recipes/save_load_across_devices.py b/recipes_source/recipes/save_load_across_devices.py
index be950e15b13..c59af8821e9 100644
--- a/recipes_source/recipes/save_load_across_devices.py
+++ b/recipes_source/recipes/save_load_across_devices.py
@@ -97,7 +97,7 @@ def forward(self, x):
 # Load
 device = torch.device('cpu')
 model = Net()
-model.load_state_dict(torch.load(PATH, map_location=device))
+model.load_state_dict(torch.load(PATH, map_location=device, weights_only=True))
 
 
 ######################################################################
diff --git a/recipes_source/recipes/saving_and_loading_a_general_checkpoint.py b/recipes_source/recipes/saving_and_loading_a_general_checkpoint.py
index 31b14f3a28a..8c773a14909 100644
--- a/recipes_source/recipes/saving_and_loading_a_general_checkpoint.py
+++ b/recipes_source/recipes/saving_and_loading_a_general_checkpoint.py
@@ -131,7 +131,7 @@ def forward(self, x):
 model = Net()
 optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
 
-checkpoint = torch.load(PATH)
+checkpoint = torch.load(PATH, weights_only=True)
 model.load_state_dict(checkpoint['model_state_dict'])
 optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
 epoch = checkpoint['epoch']
diff --git a/recipes_source/recipes/saving_and_loading_models_for_inference.py b/recipes_source/recipes/saving_and_loading_models_for_inference.py
index cd24b77c1de..7adce2a90b5 100644
--- a/recipes_source/recipes/saving_and_loading_models_for_inference.py
+++ b/recipes_source/recipes/saving_and_loading_models_for_inference.py
@@ -117,7 +117,7 @@ def forward(self, x):
 
 # Load
 model = Net()
-model.load_state_dict(torch.load(PATH))
+model.load_state_dict(torch.load(PATH, weights_only=True))
 model.eval()
 
 
diff --git a/recipes_source/recipes/saving_multiple_models_in_one_file.py b/recipes_source/recipes/saving_multiple_models_in_one_file.py
index f468d7ac6a1..e938be03b45 100644
--- a/recipes_source/recipes/saving_multiple_models_in_one_file.py
+++ b/recipes_source/recipes/saving_multiple_models_in_one_file.py
@@ -128,7 +128,7 @@ def forward(self, x):
 optimModelA = optim.SGD(modelA.parameters(), lr=0.001, momentum=0.9)
 optimModelB = optim.SGD(modelB.parameters(), lr=0.001, momentum=0.9)
 
-checkpoint = torch.load(PATH)
+checkpoint = torch.load(PATH, weights_only=True)
 modelA.load_state_dict(checkpoint['modelA_state_dict'])
 modelB.load_state_dict(checkpoint['modelB_state_dict'])
 optimizerA.load_state_dict(checkpoint['optimizerA_state_dict'])
diff --git a/recipes_source/recipes/warmstarting_model_using_parameters_from_a_different_model.py b/recipes_source/recipes/warmstarting_model_using_parameters_from_a_different_model.py
index 40aeeea9db8..a0752bfc67d 100644
--- a/recipes_source/recipes/warmstarting_model_using_parameters_from_a_different_model.py
+++ b/recipes_source/recipes/warmstarting_model_using_parameters_from_a_different_model.py
@@ -124,7 +124,7 @@ def forward(self, x):
 # are loading into.
 # 
 
-netB.load_state_dict(torch.load(PATH), strict=False)
+netB.load_state_dict(torch.load(PATH, weights_only=True), strict=False)
 
 
 ######################################################################